@@ -1886,6 +1886,7 @@ class BoUpSLP {
18861886 void deleteTree() {
18871887 VectorizableTree.clear();
18881888 ScalarToTreeEntries.clear();
1889+ OperandsToTreeEntry.clear();
18891890 ScalarsInSplitNodes.clear();
18901891 MustGather.clear();
18911892 NonScheduledFirst.clear();
@@ -3401,54 +3402,23 @@ class BoUpSLP {
34013402 const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
34023403 bool &IsProfitableToDemote, bool IsTruncRoot) const;
34033404
3404- /// Check if the operands on the edges \p Edges of the \p UserTE allows
3405- /// reordering (i.e. the operands can be reordered because they have only one
3406- /// user and reordarable).
3405+ /// Builds the list of reorderable operands on the edges \p Edges of the \p
3406+ /// UserTE, which allow reordering (i.e. the operands can be reordered because
3407+ /// they have only one user and reordarable).
34073408 /// \param ReorderableGathers List of all gather nodes that require reordering
34083409 /// (e.g., gather of extractlements or partially vectorizable loads).
34093410 /// \param GatherOps List of gather operand nodes for \p UserTE that require
34103411 /// reordering, subset of \p NonVectorized.
3411- bool
3412- canReorderOperands( TreeEntry *UserTE,
3413- SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
3414- ArrayRef< TreeEntry *> ReorderableGathers,
3415- SmallVectorImpl<TreeEntry *> &GatherOps);
3412+ void buildReorderableOperands(
3413+ TreeEntry *UserTE,
3414+ SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
3415+ const SmallPtrSetImpl<const TreeEntry *> & ReorderableGathers,
3416+ SmallVectorImpl<TreeEntry *> &GatherOps);
34163417
34173418 /// Checks if the given \p TE is a gather node with clustered reused scalars
34183419 /// and reorders it per given \p Mask.
34193420 void reorderNodeWithReuses(TreeEntry &TE, ArrayRef<int> Mask) const;
34203421
3421- /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
3422- /// if any. If it is not vectorized (gather node), returns nullptr.
3423- TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) {
3424- ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
3425- TreeEntry *TE = nullptr;
3426- const auto *It = find_if(VL, [&](Value *V) {
3427- if (!isa<Instruction>(V))
3428- return false;
3429- for (TreeEntry *E : getTreeEntries(V)) {
3430- if (E->UserTreeIndex == EdgeInfo(UserTE, OpIdx)) {
3431- TE = E;
3432- return true;
3433- }
3434- }
3435- return false;
3436- });
3437- if (It != VL.end()) {
3438- assert(TE->isSame(VL) && "Expected same scalars.");
3439- return TE;
3440- }
3441- return nullptr;
3442- }
3443-
3444- /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
3445- /// if any. If it is not vectorized (gather node), returns nullptr.
3446- const TreeEntry *getVectorizedOperand(const TreeEntry *UserTE,
3447- unsigned OpIdx) const {
3448- return const_cast<BoUpSLP *>(this)->getVectorizedOperand(
3449- const_cast<TreeEntry *>(UserTE), OpIdx);
3450- }
3451-
34523422 /// Checks if all users of \p I are the part of the vectorization tree.
34533423 bool areAllUsersVectorized(
34543424 Instruction *I,
@@ -3509,19 +3479,6 @@ class BoUpSLP {
35093479 /// Vectorize a single entry in the tree.
35103480 Value *vectorizeTree(TreeEntry *E);
35113481
3512- /// Returns vectorized operand node, that matches the order of the scalars
3513- /// operand number \p NodeIdx in entry \p E.
3514- TreeEntry *getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
3515- ArrayRef<Value *> VL,
3516- const InstructionsState &S);
3517- const TreeEntry *
3518- getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
3519- ArrayRef<Value *> VL,
3520- const InstructionsState &S) const {
3521- return const_cast<BoUpSLP *>(this)->getMatchedVectorizedOperand(E, NodeIdx,
3522- VL, S);
3523- }
3524-
35253482 /// Vectorize a single entry in the tree, the \p Idx-th operand of the entry
35263483 /// \p E.
35273484 Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx);
@@ -3715,11 +3672,6 @@ class BoUpSLP {
37153672 return IsSame(Scalars, ReuseShuffleIndices);
37163673 }
37173674
3718- bool isOperandGatherNode(const EdgeInfo &UserEI) const {
3719- return isGather() && UserTreeIndex.EdgeIdx == UserEI.EdgeIdx &&
3720- UserTreeIndex.UserTE == UserEI.UserTE;
3721- }
3722-
37233675 /// \returns true if current entry has same operands as \p TE.
37243676 bool hasEqualOperands(const TreeEntry &TE) const {
37253677 if (TE.getNumOperands() != getNumOperands())
@@ -4107,6 +4059,9 @@ class BoUpSLP {
41074059 TreeEntry *Last = VectorizableTree.back().get();
41084060 Last->Idx = VectorizableTree.size() - 1;
41094061 Last->State = EntryState;
4062+ if (UserTreeIdx.UserTE)
4063+ OperandsToTreeEntry.try_emplace(
4064+ std::make_pair(UserTreeIdx.UserTE, UserTreeIdx.EdgeIdx), Last);
41104065 // FIXME: Remove once support for ReuseShuffleIndices has been implemented
41114066 // for non-power-of-two vectors.
41124067 assert(
@@ -4298,6 +4253,10 @@ class BoUpSLP {
42984253 /// Maps a specific scalar to its tree entry(ies).
42994254 SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
43004255
4256+ /// Maps the operand index and entry to the corresponding tree entry.
4257+ SmallDenseMap<std::pair<const TreeEntry *, unsigned>, TreeEntry *>
4258+ OperandsToTreeEntry;
4259+
43014260 /// Scalars, used in split vectorize nodes.
43024261 SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarsInSplitNodes;
43034262
@@ -7411,11 +7370,11 @@ void BoUpSLP::reorderTopToBottom() {
74117370 }
74127371}
74137372
7414- bool BoUpSLP::canReorderOperands (
7373+ void BoUpSLP::buildReorderableOperands (
74157374 TreeEntry *UserTE, SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
7416- ArrayRef< TreeEntry *> ReorderableGathers,
7375+ const SmallPtrSetImpl<const TreeEntry *> & ReorderableGathers,
74177376 SmallVectorImpl<TreeEntry *> &GatherOps) {
7418- for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I ) {
7377+ for (unsigned I : seq<unsigned>( UserTE->getNumOperands()) ) {
74197378 if (any_of(Edges, [I](const std::pair<unsigned, TreeEntry *> &OpData) {
74207379 return OpData.first == I &&
74217380 (OpData.second->State == TreeEntry::Vectorize ||
@@ -7424,7 +7383,25 @@ bool BoUpSLP::canReorderOperands(
74247383 OpData.second->State == TreeEntry::SplitVectorize);
74257384 }))
74267385 continue;
7427- if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) {
7386+ // Do not request operands, if they do not exist.
7387+ if (UserTE->hasState()) {
7388+ if (UserTE->getOpcode() == Instruction::ExtractElement ||
7389+ UserTE->getOpcode() == Instruction::ExtractValue)
7390+ continue;
7391+ if (UserTE->getOpcode() == Instruction::InsertElement && I == 0)
7392+ continue;
7393+ if (UserTE->getOpcode() == Instruction::Store &&
7394+ UserTE->State == TreeEntry::Vectorize && I == 1)
7395+ continue;
7396+ if (UserTE->getOpcode() == Instruction::Load &&
7397+ (UserTE->State == TreeEntry::Vectorize ||
7398+ UserTE->State == TreeEntry::StridedVectorize ||
7399+ UserTE->State == TreeEntry::CompressVectorize))
7400+ continue;
7401+ }
7402+ TreeEntry *TE = getOperandEntry(UserTE, I);
7403+ assert(TE && "Expected operand entry.");
7404+ if (!TE->isGather()) {
74287405 // Add the node to the list of the ordered nodes with the identity
74297406 // order.
74307407 Edges.emplace_back(I, TE);
@@ -7433,37 +7410,14 @@ bool BoUpSLP::canReorderOperands(
74337410 // simply add to the list of gathered ops.
74347411 // If there are reused scalars, process this node as a regular vectorize
74357412 // node, just reorder reuses mask.
7436- if (TE->State != TreeEntry::Vectorize &&
7437- TE->State != TreeEntry::StridedVectorize &&
7438- TE->State != TreeEntry::CompressVectorize &&
7439- TE->State != TreeEntry::SplitVectorize &&
7413+ if (TE->State == TreeEntry::ScatterVectorize &&
74407414 TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty())
74417415 GatherOps.push_back(TE);
74427416 continue;
74437417 }
7444- TreeEntry *Gather = nullptr;
7445- if (count_if(ReorderableGathers,
7446- [&Gather, UserTE, I](TreeEntry *TE) {
7447- assert(TE->State != TreeEntry::Vectorize &&
7448- TE->State != TreeEntry::StridedVectorize &&
7449- TE->State != TreeEntry::CompressVectorize &&
7450- TE->State != TreeEntry::SplitVectorize &&
7451- "Only non-vectorized nodes are expected.");
7452- if (TE->UserTreeIndex.UserTE == UserTE &&
7453- TE->UserTreeIndex.EdgeIdx == I) {
7454- assert(TE->isSame(UserTE->getOperand(I)) &&
7455- "Operand entry does not match operands.");
7456- Gather = TE;
7457- return true;
7458- }
7459- return false;
7460- }) > 1 &&
7461- !allConstant(UserTE->getOperand(I)))
7462- return false;
7463- if (Gather)
7464- GatherOps.push_back(Gather);
7418+ if (ReorderableGathers.contains(TE))
7419+ GatherOps.push_back(TE);
74657420 }
7466- return true;
74677421}
74687422
74697423void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
@@ -7479,13 +7433,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
74797433 // Find all reorderable leaf nodes with the given VF.
74807434 // Currently the are vectorized loads,extracts without alternate operands +
74817435 // some gathering of extracts.
7482- SmallVector< TreeEntry *> NonVectorized;
7436+ SmallPtrSet<const TreeEntry *, 4 > NonVectorized;
74837437 for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
74847438 if (TE->State != TreeEntry::Vectorize &&
74857439 TE->State != TreeEntry::StridedVectorize &&
74867440 TE->State != TreeEntry::CompressVectorize &&
74877441 TE->State != TreeEntry::SplitVectorize)
7488- NonVectorized.push_back (TE.get());
7442+ NonVectorized.insert (TE.get());
74897443 if (std::optional<OrdersType> CurrentOrder =
74907444 getReorderingData(*TE, /*TopToBottom=*/false, IgnoreReorder)) {
74917445 Queue.push(TE.get());
@@ -7584,11 +7538,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
75847538 }
75857539 // Check that operands are used only in the User node.
75867540 SmallVector<TreeEntry *> GatherOps;
7587- if (!canReorderOperands(Data.first, Data.second, NonVectorized,
7588- GatherOps)) {
7589- Visited.insert_range(llvm::make_second_range(Data.second));
7590- continue;
7591- }
7541+ buildReorderableOperands(Data.first, Data.second, NonVectorized,
7542+ GatherOps);
75927543 // All operands are reordered and used only in this node - propagate the
75937544 // most used order to the user node.
75947545 MapVector<OrdersType, unsigned,
@@ -12916,33 +12867,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
1291612867
1291712868const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E,
1291812869 unsigned Idx) const {
12919- ArrayRef<Value *> VL = E->getOperand(Idx);
12920- InstructionsState S = getSameOpcode(VL, *TLI);
12921- // Special processing for GEPs bundle, which may include non-gep values.
12922- if (!S && VL.front()->getType()->isPointerTy()) {
12923- const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
12924- if (It != VL.end())
12925- S = getSameOpcode(*It, *TLI);
12926- }
12927- if (const TreeEntry *VE = getMatchedVectorizedOperand(E, Idx, VL, S))
12928- return VE;
12929- if (S || !isConstant(VL.front())) {
12930- for (const TreeEntry *VE :
12931- ValueToGatherNodes.lookup(S ? S.getMainOp() : VL.front()))
12932- if (VE->UserTreeIndex.EdgeIdx == Idx && VE->UserTreeIndex.UserTE == E) {
12933- assert(VE->isSame(VL) && "Expected gather node with same values.");
12934- return VE;
12935- }
12936- }
12937- const auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
12938- [&](const std::unique_ptr<TreeEntry> &TE) {
12939- return (TE->isGather() ||
12940- TE->State == TreeEntry::SplitVectorize) &&
12941- TE->UserTreeIndex.EdgeIdx == Idx &&
12942- TE->UserTreeIndex.UserTE == E;
12943- });
12944- assert(It != VectorizableTree.end() && "Expected vectorizable entry.");
12945- return It->get();
12870+ TreeEntry *Op = OperandsToTreeEntry.at({E, Idx});
12871+ assert(Op->isSame(E->getOperand(Idx)) && "Operands mismatch!");
12872+ return Op;
1294612873}
1294712874
1294812875TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
@@ -16914,121 +16841,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1691416841 }
1691516842};
1691616843
16917- BoUpSLP::TreeEntry *
16918- BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
16919- ArrayRef<Value *> VL,
16920- const InstructionsState &S) {
16921- if (!S)
16922- return nullptr;
16923- for (TreeEntry *TE : ScalarToTreeEntries.lookup(S.getMainOp()))
16924- if (TE->UserTreeIndex.UserTE == E && TE->UserTreeIndex.EdgeIdx == NodeIdx &&
16925- TE->isSame(VL))
16926- return TE;
16927- return nullptr;
16928- }
16929-
1693016844Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
16931- ValueList &VL = E->getOperand(NodeIdx);
16932- InstructionsState S = getSameOpcode(VL, *TLI);
16933- // Special processing for GEPs bundle, which may include non-gep values.
16934- if (!S && VL.front()->getType()->isPointerTy()) {
16935- const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
16936- if (It != VL.end())
16937- S = getSameOpcode(*It, *TLI);
16938- }
16939- const unsigned VF = VL.size();
16940- if (TreeEntry *VE = getMatchedVectorizedOperand(E, NodeIdx, VL, S)) {
16941- auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
16942- // V may be affected by MinBWs.
16943- // We want ShuffleInstructionBuilder to correctly support REVEC. The key
16944- // factor is the number of elements, not their type.
16945- Type *ScalarTy = cast<VectorType>(V->getType())->getElementType();
16946- unsigned NumElements = getNumElements(VL.front()->getType());
16947- ShuffleInstructionBuilder ShuffleBuilder(
16948- NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements)
16949- : ScalarTy,
16950- Builder, *this);
16951- ShuffleBuilder.add(V, Mask);
16952- SmallVector<std::pair<const TreeEntry *, unsigned>> SubVectors(
16953- E->CombinedEntriesWithIndices.size());
16954- transform(E->CombinedEntriesWithIndices, SubVectors.begin(),
16955- [&](const auto &P) {
16956- return std::make_pair(VectorizableTree[P.first].get(),
16957- P.second);
16958- });
16959- assert((E->CombinedEntriesWithIndices.empty() ||
16960- E->ReorderIndices.empty()) &&
16961- "Expected either combined subnodes or reordering");
16962- return ShuffleBuilder.finalize({}, SubVectors, {});
16963- };
16964- Value *V = vectorizeTree(VE);
16965- if (VF * getNumElements(VL[0]->getType()) !=
16966- cast<FixedVectorType>(V->getType())->getNumElements()) {
16967- if (!VE->ReuseShuffleIndices.empty()) {
16968- // Reshuffle to get only unique values.
16969- // If some of the scalars are duplicated in the vectorization
16970- // tree entry, we do not vectorize them but instead generate a
16971- // mask for the reuses. But if there are several users of the
16972- // same entry, they may have different vectorization factors.
16973- // This is especially important for PHI nodes. In this case, we
16974- // need to adapt the resulting instruction for the user
16975- // vectorization factor and have to reshuffle it again to take
16976- // only unique elements of the vector. Without this code the
16977- // function incorrectly returns reduced vector instruction with
16978- // the same elements, not with the unique ones.
16979-
16980- // block:
16981- // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
16982- // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
16983- // ... (use %2)
16984- // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
16985- // br %block
16986- SmallVector<int> Mask(VF, PoisonMaskElem);
16987- for (auto [I, V] : enumerate(VL)) {
16988- if (isa<PoisonValue>(V))
16989- continue;
16990- Mask[I] = VE->findLaneForValue(V);
16991- }
16992- V = FinalShuffle(V, Mask);
16993- } else {
16994- assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
16995- "Expected vectorization factor less "
16996- "than original vector size.");
16997- SmallVector<int> UniformMask(VF, 0);
16998- std::iota(UniformMask.begin(), UniformMask.end(), 0);
16999- V = FinalShuffle(V, UniformMask);
17000- }
17001- }
17002- // Need to update the operand gather node, if actually the operand is not a
17003- // vectorized node, but the buildvector/gather node, which matches one of
17004- // the vectorized nodes.
17005- if (VE->UserTreeIndex.UserTE != E || VE->UserTreeIndex.EdgeIdx != NodeIdx) {
17006- auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
17007- [&](const std::unique_ptr<TreeEntry> &TE) {
17008- return TE->isGather() &&
17009- TE->UserTreeIndex.UserTE == E &&
17010- TE->UserTreeIndex.EdgeIdx == NodeIdx;
17011- });
17012- assert(It != VectorizableTree.end() && "Expected gather node operand.");
17013- (*It)->VectorizedValue = V;
17014- }
17015- return V;
17016- }
17017-
17018- // Find the corresponding gather entry and vectorize it.
17019- // Allows to be more accurate with tree/graph transformations, checks for the
17020- // correctness of the transformations in many cases.
17021- auto *I = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
17022- [E, NodeIdx](const std::unique_ptr<TreeEntry> &TE) {
17023- return TE->isOperandGatherNode({E, NodeIdx}) ||
17024- (TE->State == TreeEntry::SplitVectorize &&
17025- TE->UserTreeIndex == EdgeInfo(E, NodeIdx));
17026- });
17027- assert(I != VectorizableTree.end() && "Gather node is not in the graph.");
17028- assert(I->get()->UserTreeIndex &&
17029- "Expected only single user for the gather node.");
17030- assert(I->get()->isSame(VL) && "Expected same list of scalars.");
17031- return vectorizeTree(I->get());
16845+ return vectorizeTree(getOperandEntry(E, NodeIdx));
1703216846}
1703316847
1703416848template <typename BVTy, typename ResTy, typename... Args>
0 commit comments