@@ -1886,6 +1886,7 @@ class BoUpSLP {
18861886  void deleteTree() {
18871887    VectorizableTree.clear();
18881888    ScalarToTreeEntries.clear();
1889+     OperandsToTreeEntry.clear();
18891890    ScalarsInSplitNodes.clear();
18901891    MustGather.clear();
18911892    NonScheduledFirst.clear();
@@ -3401,54 +3402,23 @@ class BoUpSLP {
34013402      const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
34023403      bool &IsProfitableToDemote, bool IsTruncRoot) const;
34033404
3404-   /// Check if  the operands on the edges \p Edges of the \p UserTE allows 
3405-   /// reordering (i.e. the operands can be reordered because they have only one 
3406-   /// user and reordarable).
3405+   /// Builds  the list of reorderable  operands on the edges \p Edges of the \p
3406+   /// UserTE, which allow  reordering (i.e. the operands can be reordered because
3407+   /// they have only one  user and reordarable).
34073408  /// \param ReorderableGathers List of all gather nodes that require reordering
34083409  /// (e.g., gather of extractlements or partially vectorizable loads).
34093410  /// \param GatherOps List of gather operand nodes for \p UserTE that require
34103411  /// reordering, subset of \p NonVectorized.
3411-   bool 
3412-   canReorderOperands( TreeEntry *UserTE,
3413-                       SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
3414-                      ArrayRef< TreeEntry *> ReorderableGathers,
3415-                       SmallVectorImpl<TreeEntry *> &GatherOps);
3412+   void buildReorderableOperands( 
3413+        TreeEntry *UserTE,
3414+       SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
3415+       const SmallPtrSetImpl<const  TreeEntry *> & ReorderableGathers,
3416+       SmallVectorImpl<TreeEntry *> &GatherOps);
34163417
34173418  /// Checks if the given \p TE is a gather node with clustered reused scalars
34183419  /// and reorders it per given \p Mask.
34193420  void reorderNodeWithReuses(TreeEntry &TE, ArrayRef<int> Mask) const;
34203421
3421-   /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
3422-   /// if any. If it is not vectorized (gather node), returns nullptr.
3423-   TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) {
3424-     ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
3425-     TreeEntry *TE = nullptr;
3426-     const auto *It = find_if(VL, [&](Value *V) {
3427-       if (!isa<Instruction>(V))
3428-         return false;
3429-       for (TreeEntry *E : getTreeEntries(V)) {
3430-         if (E->UserTreeIndex == EdgeInfo(UserTE, OpIdx)) {
3431-           TE = E;
3432-           return true;
3433-         }
3434-       }
3435-       return false;
3436-     });
3437-     if (It != VL.end()) {
3438-       assert(TE->isSame(VL) && "Expected same scalars.");
3439-       return TE;
3440-     }
3441-     return nullptr;
3442-   }
3443- 
3444-   /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
3445-   /// if any. If it is not vectorized (gather node), returns nullptr.
3446-   const TreeEntry *getVectorizedOperand(const TreeEntry *UserTE,
3447-                                         unsigned OpIdx) const {
3448-     return const_cast<BoUpSLP *>(this)->getVectorizedOperand(
3449-         const_cast<TreeEntry *>(UserTE), OpIdx);
3450-   }
3451- 
34523422  /// Checks if all users of \p I are the part of the vectorization tree.
34533423  bool areAllUsersVectorized(
34543424      Instruction *I,
@@ -3509,19 +3479,6 @@ class BoUpSLP {
35093479  /// Vectorize a single entry in the tree.
35103480  Value *vectorizeTree(TreeEntry *E);
35113481
3512-   /// Returns vectorized operand node, that matches the order of the scalars
3513-   /// operand number \p NodeIdx in entry \p E.
3514-   TreeEntry *getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
3515-                                          ArrayRef<Value *> VL,
3516-                                          const InstructionsState &S);
3517-   const TreeEntry *
3518-   getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
3519-                               ArrayRef<Value *> VL,
3520-                               const InstructionsState &S) const {
3521-     return const_cast<BoUpSLP *>(this)->getMatchedVectorizedOperand(E, NodeIdx,
3522-                                                                     VL, S);
3523-   }
3524- 
35253482  /// Vectorize a single entry in the tree, the \p Idx-th operand of the entry
35263483  /// \p E.
35273484  Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx);
@@ -3715,11 +3672,6 @@ class BoUpSLP {
37153672      return IsSame(Scalars, ReuseShuffleIndices);
37163673    }
37173674
3718-     bool isOperandGatherNode(const EdgeInfo &UserEI) const {
3719-       return isGather() && UserTreeIndex.EdgeIdx == UserEI.EdgeIdx &&
3720-              UserTreeIndex.UserTE == UserEI.UserTE;
3721-     }
3722- 
37233675    /// \returns true if current entry has same operands as \p TE.
37243676    bool hasEqualOperands(const TreeEntry &TE) const {
37253677      if (TE.getNumOperands() != getNumOperands())
@@ -4107,6 +4059,9 @@ class BoUpSLP {
41074059    TreeEntry *Last = VectorizableTree.back().get();
41084060    Last->Idx = VectorizableTree.size() - 1;
41094061    Last->State = EntryState;
4062+     if (UserTreeIdx.UserTE)
4063+       OperandsToTreeEntry.try_emplace(
4064+           std::make_pair(UserTreeIdx.UserTE, UserTreeIdx.EdgeIdx), Last);
41104065    // FIXME: Remove once support for ReuseShuffleIndices has been implemented
41114066    // for non-power-of-two vectors.
41124067    assert(
@@ -4298,6 +4253,10 @@ class BoUpSLP {
42984253  /// Maps a specific scalar to its tree entry(ies).
42994254  SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
43004255
4256+   /// Maps the operand index and entry to the corresponding tree entry.
4257+   SmallDenseMap<std::pair<const TreeEntry *, unsigned>, TreeEntry *>
4258+       OperandsToTreeEntry;
4259+ 
43014260  /// Scalars, used in split vectorize nodes.
43024261  SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarsInSplitNodes;
43034262
@@ -7411,11 +7370,11 @@ void BoUpSLP::reorderTopToBottom() {
74117370  }
74127371}
74137372
7414- bool  BoUpSLP::canReorderOperands (
7373+ void  BoUpSLP::buildReorderableOperands (
74157374    TreeEntry *UserTE, SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
7416-     ArrayRef< TreeEntry *> ReorderableGathers,
7375+     const SmallPtrSetImpl<const  TreeEntry *> & ReorderableGathers,
74177376    SmallVectorImpl<TreeEntry *> &GatherOps) {
7418-   for (unsigned I = 0, E =  UserTE->getNumOperands(); I < E; ++I ) {
7377+   for (unsigned I : seq<unsigned>( UserTE->getNumOperands()) ) {
74197378    if (any_of(Edges, [I](const std::pair<unsigned, TreeEntry *> &OpData) {
74207379          return OpData.first == I &&
74217380                 (OpData.second->State == TreeEntry::Vectorize ||
@@ -7424,7 +7383,25 @@ bool BoUpSLP::canReorderOperands(
74247383                  OpData.second->State == TreeEntry::SplitVectorize);
74257384        }))
74267385      continue;
7427-     if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) {
7386+     // Do not request operands, if they do not exist.
7387+     if (UserTE->hasState()) {
7388+       if (UserTE->getOpcode() == Instruction::ExtractElement ||
7389+           UserTE->getOpcode() == Instruction::ExtractValue)
7390+         continue;
7391+       if (UserTE->getOpcode() == Instruction::InsertElement && I == 0)
7392+         continue;
7393+       if (UserTE->getOpcode() == Instruction::Store &&
7394+           UserTE->State == TreeEntry::Vectorize && I == 1)
7395+         continue;
7396+       if (UserTE->getOpcode() == Instruction::Load &&
7397+           (UserTE->State == TreeEntry::Vectorize ||
7398+            UserTE->State == TreeEntry::StridedVectorize ||
7399+            UserTE->State == TreeEntry::CompressVectorize))
7400+         continue;
7401+     }
7402+     TreeEntry *TE = getOperandEntry(UserTE, I);
7403+     assert(TE && "Expected operand entry.");
7404+     if (!TE->isGather()) {
74287405      // Add the node to the list of the ordered nodes with the identity
74297406      // order.
74307407      Edges.emplace_back(I, TE);
@@ -7433,37 +7410,14 @@ bool BoUpSLP::canReorderOperands(
74337410      // simply add to the list of gathered ops.
74347411      // If there are reused scalars, process this node as a regular vectorize
74357412      // node, just reorder reuses mask.
7436-       if (TE->State != TreeEntry::Vectorize &&
7437-           TE->State != TreeEntry::StridedVectorize &&
7438-           TE->State != TreeEntry::CompressVectorize &&
7439-           TE->State != TreeEntry::SplitVectorize &&
7413+       if (TE->State == TreeEntry::ScatterVectorize &&
74407414          TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty())
74417415        GatherOps.push_back(TE);
74427416      continue;
74437417    }
7444-     TreeEntry *Gather = nullptr;
7445-     if (count_if(ReorderableGathers,
7446-                  [&Gather, UserTE, I](TreeEntry *TE) {
7447-                    assert(TE->State != TreeEntry::Vectorize &&
7448-                           TE->State != TreeEntry::StridedVectorize &&
7449-                           TE->State != TreeEntry::CompressVectorize &&
7450-                           TE->State != TreeEntry::SplitVectorize &&
7451-                           "Only non-vectorized nodes are expected.");
7452-                    if (TE->UserTreeIndex.UserTE == UserTE &&
7453-                        TE->UserTreeIndex.EdgeIdx == I) {
7454-                      assert(TE->isSame(UserTE->getOperand(I)) &&
7455-                             "Operand entry does not match operands.");
7456-                      Gather = TE;
7457-                      return true;
7458-                    }
7459-                    return false;
7460-                  }) > 1 &&
7461-         !allConstant(UserTE->getOperand(I)))
7462-       return false;
7463-     if (Gather)
7464-       GatherOps.push_back(Gather);
7418+     if (ReorderableGathers.contains(TE))
7419+       GatherOps.push_back(TE);
74657420  }
7466-   return true;
74677421}
74687422
74697423void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
@@ -7479,13 +7433,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
74797433  // Find all reorderable leaf nodes with the given VF.
74807434  // Currently the are vectorized loads,extracts without alternate operands +
74817435  // some gathering of extracts.
7482-   SmallVector< TreeEntry *> NonVectorized;
7436+   SmallPtrSet<const  TreeEntry *, 4 > NonVectorized;
74837437  for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
74847438    if (TE->State != TreeEntry::Vectorize &&
74857439        TE->State != TreeEntry::StridedVectorize &&
74867440        TE->State != TreeEntry::CompressVectorize &&
74877441        TE->State != TreeEntry::SplitVectorize)
7488-       NonVectorized.push_back (TE.get());
7442+       NonVectorized.insert (TE.get());
74897443    if (std::optional<OrdersType> CurrentOrder =
74907444            getReorderingData(*TE, /*TopToBottom=*/false, IgnoreReorder)) {
74917445      Queue.push(TE.get());
@@ -7584,11 +7538,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
75847538      }
75857539      // Check that operands are used only in the User node.
75867540      SmallVector<TreeEntry *> GatherOps;
7587-       if (!canReorderOperands(Data.first, Data.second, NonVectorized,
7588-                               GatherOps)) {
7589-         Visited.insert_range(llvm::make_second_range(Data.second));
7590-         continue;
7591-       }
7541+       buildReorderableOperands(Data.first, Data.second, NonVectorized,
7542+                                GatherOps);
75927543      // All operands are reordered and used only in this node - propagate the
75937544      // most used order to the user node.
75947545      MapVector<OrdersType, unsigned,
@@ -12916,33 +12867,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
1291612867
1291712868const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E,
1291812869                                                   unsigned Idx) const {
12919-   ArrayRef<Value *> VL = E->getOperand(Idx);
12920-   InstructionsState S = getSameOpcode(VL, *TLI);
12921-   // Special processing for GEPs bundle, which may include non-gep values.
12922-   if (!S && VL.front()->getType()->isPointerTy()) {
12923-     const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
12924-     if (It != VL.end())
12925-       S = getSameOpcode(*It, *TLI);
12926-   }
12927-   if (const TreeEntry *VE = getMatchedVectorizedOperand(E, Idx, VL, S))
12928-     return VE;
12929-   if (S || !isConstant(VL.front())) {
12930-     for (const TreeEntry *VE :
12931-          ValueToGatherNodes.lookup(S ? S.getMainOp() : VL.front()))
12932-       if (VE->UserTreeIndex.EdgeIdx == Idx && VE->UserTreeIndex.UserTE == E) {
12933-         assert(VE->isSame(VL) && "Expected gather node with same values.");
12934-         return VE;
12935-       }
12936-   }
12937-   const auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
12938-                            [&](const std::unique_ptr<TreeEntry> &TE) {
12939-                              return (TE->isGather() ||
12940-                                      TE->State == TreeEntry::SplitVectorize) &&
12941-                                     TE->UserTreeIndex.EdgeIdx == Idx &&
12942-                                     TE->UserTreeIndex.UserTE == E;
12943-                            });
12944-   assert(It != VectorizableTree.end() && "Expected vectorizable entry.");
12945-   return It->get();
12870+   TreeEntry *Op = OperandsToTreeEntry.at({E, Idx});
12871+   assert(Op->isSame(E->getOperand(Idx)) && "Operands mismatch!");
12872+   return Op;
1294612873}
1294712874
1294812875TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
@@ -16914,121 +16841,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1691416841  }
1691516842};
1691616843
16917- BoUpSLP::TreeEntry *
16918- BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
16919-                                      ArrayRef<Value *> VL,
16920-                                      const InstructionsState &S) {
16921-   if (!S)
16922-     return nullptr;
16923-   for (TreeEntry *TE : ScalarToTreeEntries.lookup(S.getMainOp()))
16924-     if (TE->UserTreeIndex.UserTE == E && TE->UserTreeIndex.EdgeIdx == NodeIdx &&
16925-         TE->isSame(VL))
16926-       return TE;
16927-   return nullptr;
16928- }
16929- 
1693016844Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
16931-   ValueList &VL = E->getOperand(NodeIdx);
16932-   InstructionsState S = getSameOpcode(VL, *TLI);
16933-   // Special processing for GEPs bundle, which may include non-gep values.
16934-   if (!S && VL.front()->getType()->isPointerTy()) {
16935-     const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
16936-     if (It != VL.end())
16937-       S = getSameOpcode(*It, *TLI);
16938-   }
16939-   const unsigned VF = VL.size();
16940-   if (TreeEntry *VE = getMatchedVectorizedOperand(E, NodeIdx, VL, S)) {
16941-     auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
16942-       // V may be affected by MinBWs.
16943-       // We want ShuffleInstructionBuilder to correctly support REVEC. The key
16944-       // factor is the number of elements, not their type.
16945-       Type *ScalarTy = cast<VectorType>(V->getType())->getElementType();
16946-       unsigned NumElements = getNumElements(VL.front()->getType());
16947-       ShuffleInstructionBuilder ShuffleBuilder(
16948-           NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements)
16949-                            : ScalarTy,
16950-           Builder, *this);
16951-       ShuffleBuilder.add(V, Mask);
16952-       SmallVector<std::pair<const TreeEntry *, unsigned>> SubVectors(
16953-           E->CombinedEntriesWithIndices.size());
16954-       transform(E->CombinedEntriesWithIndices, SubVectors.begin(),
16955-                 [&](const auto &P) {
16956-                   return std::make_pair(VectorizableTree[P.first].get(),
16957-                                         P.second);
16958-                 });
16959-       assert((E->CombinedEntriesWithIndices.empty() ||
16960-               E->ReorderIndices.empty()) &&
16961-              "Expected either combined subnodes or reordering");
16962-       return ShuffleBuilder.finalize({}, SubVectors, {});
16963-     };
16964-     Value *V = vectorizeTree(VE);
16965-     if (VF * getNumElements(VL[0]->getType()) !=
16966-         cast<FixedVectorType>(V->getType())->getNumElements()) {
16967-       if (!VE->ReuseShuffleIndices.empty()) {
16968-         // Reshuffle to get only unique values.
16969-         // If some of the scalars are duplicated in the vectorization
16970-         // tree entry, we do not vectorize them but instead generate a
16971-         // mask for the reuses. But if there are several users of the
16972-         // same entry, they may have different vectorization factors.
16973-         // This is especially important for PHI nodes. In this case, we
16974-         // need to adapt the resulting instruction for the user
16975-         // vectorization factor and have to reshuffle it again to take
16976-         // only unique elements of the vector. Without this code the
16977-         // function incorrectly returns reduced vector instruction with
16978-         // the same elements, not with the unique ones.
16979- 
16980-         // block:
16981-         // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
16982-         // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
16983-         // ... (use %2)
16984-         // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
16985-         // br %block
16986-         SmallVector<int> Mask(VF, PoisonMaskElem);
16987-         for (auto [I, V] : enumerate(VL)) {
16988-           if (isa<PoisonValue>(V))
16989-             continue;
16990-           Mask[I] = VE->findLaneForValue(V);
16991-         }
16992-         V = FinalShuffle(V, Mask);
16993-       } else {
16994-         assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
16995-                "Expected vectorization factor less "
16996-                "than original vector size.");
16997-         SmallVector<int> UniformMask(VF, 0);
16998-         std::iota(UniformMask.begin(), UniformMask.end(), 0);
16999-         V = FinalShuffle(V, UniformMask);
17000-       }
17001-     }
17002-     // Need to update the operand gather node, if actually the operand is not a
17003-     // vectorized node, but the buildvector/gather node, which matches one of
17004-     // the vectorized nodes.
17005-     if (VE->UserTreeIndex.UserTE != E || VE->UserTreeIndex.EdgeIdx != NodeIdx) {
17006-       auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
17007-                          [&](const std::unique_ptr<TreeEntry> &TE) {
17008-                            return TE->isGather() &&
17009-                                   TE->UserTreeIndex.UserTE == E &&
17010-                                   TE->UserTreeIndex.EdgeIdx == NodeIdx;
17011-                          });
17012-       assert(It != VectorizableTree.end() && "Expected gather node operand.");
17013-       (*It)->VectorizedValue = V;
17014-     }
17015-     return V;
17016-   }
17017- 
17018-   // Find the corresponding gather entry and vectorize it.
17019-   // Allows to be more accurate with tree/graph transformations, checks for the
17020-   // correctness of the transformations in many cases.
17021-   auto *I = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
17022-                     [E, NodeIdx](const std::unique_ptr<TreeEntry> &TE) {
17023-                       return TE->isOperandGatherNode({E, NodeIdx}) ||
17024-                              (TE->State == TreeEntry::SplitVectorize &&
17025-                               TE->UserTreeIndex == EdgeInfo(E, NodeIdx));
17026-                     });
17027-   assert(I != VectorizableTree.end() && "Gather node is not in the graph.");
17028-   assert(I->get()->UserTreeIndex &&
17029-          "Expected only single user for the gather node.");
17030-   assert(I->get()->isSame(VL) && "Expected same list of scalars.");
17031-   return vectorizeTree(I->get());
16845+   return vectorizeTree(getOperandEntry(E, NodeIdx));
1703216846}
1703316847
1703416848template <typename BVTy, typename ResTy, typename... Args>
0 commit comments