From 57557747521e7753d3911f1201be9ff6afea32e9 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 19 May 2025 14:47:23 +0000 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 284 +++--------------- 1 file changed, 49 insertions(+), 235 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index db4a5713a49a2..5b9ced4561a0c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1886,6 +1886,7 @@ class BoUpSLP { void deleteTree() { VectorizableTree.clear(); ScalarToTreeEntries.clear(); + OperandsToTreeEntry.clear(); ScalarsInSplitNodes.clear(); MustGather.clear(); NonScheduledFirst.clear(); @@ -3401,54 +3402,23 @@ class BoUpSLP { const SmallDenseSet &NodesToKeepBWs, unsigned &MaxDepthLevel, bool &IsProfitableToDemote, bool IsTruncRoot) const; - /// Check if the operands on the edges \p Edges of the \p UserTE allows - /// reordering (i.e. the operands can be reordered because they have only one - /// user and reordarable). + /// Builds the list of reorderable operands on the edges \p Edges of the \p + /// UserTE, which allow reordering (i.e. the operands can be reordered because + /// they have only one user and reordarable). /// \param ReorderableGathers List of all gather nodes that require reordering /// (e.g., gather of extractlements or partially vectorizable loads). /// \param GatherOps List of gather operand nodes for \p UserTE that require /// reordering, subset of \p NonVectorized. - bool - canReorderOperands(TreeEntry *UserTE, - SmallVectorImpl> &Edges, - ArrayRef ReorderableGathers, - SmallVectorImpl &GatherOps); + void buildReorderableOperands( + TreeEntry *UserTE, + SmallVectorImpl> &Edges, + const SmallPtrSetImpl &ReorderableGathers, + SmallVectorImpl &GatherOps); /// Checks if the given \p TE is a gather node with clustered reused scalars /// and reorders it per given \p Mask. void reorderNodeWithReuses(TreeEntry &TE, ArrayRef Mask) const; - /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph, - /// if any. If it is not vectorized (gather node), returns nullptr. - TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) { - ArrayRef VL = UserTE->getOperand(OpIdx); - TreeEntry *TE = nullptr; - const auto *It = find_if(VL, [&](Value *V) { - if (!isa(V)) - return false; - for (TreeEntry *E : getTreeEntries(V)) { - if (E->UserTreeIndex == EdgeInfo(UserTE, OpIdx)) { - TE = E; - return true; - } - } - return false; - }); - if (It != VL.end()) { - assert(TE->isSame(VL) && "Expected same scalars."); - return TE; - } - return nullptr; - } - - /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph, - /// if any. If it is not vectorized (gather node), returns nullptr. - const TreeEntry *getVectorizedOperand(const TreeEntry *UserTE, - unsigned OpIdx) const { - return const_cast(this)->getVectorizedOperand( - const_cast(UserTE), OpIdx); - } - /// Checks if all users of \p I are the part of the vectorization tree. bool areAllUsersVectorized( Instruction *I, @@ -3509,19 +3479,6 @@ class BoUpSLP { /// Vectorize a single entry in the tree. Value *vectorizeTree(TreeEntry *E); - /// Returns vectorized operand node, that matches the order of the scalars - /// operand number \p NodeIdx in entry \p E. - TreeEntry *getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx, - ArrayRef VL, - const InstructionsState &S); - const TreeEntry * - getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx, - ArrayRef VL, - const InstructionsState &S) const { - return const_cast(this)->getMatchedVectorizedOperand(E, NodeIdx, - VL, S); - } - /// Vectorize a single entry in the tree, the \p Idx-th operand of the entry /// \p E. Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx); @@ -3715,11 +3672,6 @@ class BoUpSLP { return IsSame(Scalars, ReuseShuffleIndices); } - bool isOperandGatherNode(const EdgeInfo &UserEI) const { - return isGather() && UserTreeIndex.EdgeIdx == UserEI.EdgeIdx && - UserTreeIndex.UserTE == UserEI.UserTE; - } - /// \returns true if current entry has same operands as \p TE. bool hasEqualOperands(const TreeEntry &TE) const { if (TE.getNumOperands() != getNumOperands()) @@ -4107,6 +4059,9 @@ class BoUpSLP { TreeEntry *Last = VectorizableTree.back().get(); Last->Idx = VectorizableTree.size() - 1; Last->State = EntryState; + if (UserTreeIdx.UserTE) + OperandsToTreeEntry.try_emplace( + std::make_pair(UserTreeIdx.UserTE, UserTreeIdx.EdgeIdx), Last); // FIXME: Remove once support for ReuseShuffleIndices has been implemented // for non-power-of-two vectors. assert( @@ -4298,6 +4253,10 @@ class BoUpSLP { /// Maps a specific scalar to its tree entry(ies). SmallDenseMap> ScalarToTreeEntries; + /// Maps the operand index and entry to the corresponding tree entry. + SmallDenseMap, TreeEntry *> + OperandsToTreeEntry; + /// Scalars, used in split vectorize nodes. SmallDenseMap> ScalarsInSplitNodes; @@ -7411,11 +7370,11 @@ void BoUpSLP::reorderTopToBottom() { } } -bool BoUpSLP::canReorderOperands( +void BoUpSLP::buildReorderableOperands( TreeEntry *UserTE, SmallVectorImpl> &Edges, - ArrayRef ReorderableGathers, + const SmallPtrSetImpl &ReorderableGathers, SmallVectorImpl &GatherOps) { - for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) { + for (unsigned I : seq(UserTE->getNumOperands())) { if (any_of(Edges, [I](const std::pair &OpData) { return OpData.first == I && (OpData.second->State == TreeEntry::Vectorize || @@ -7424,7 +7383,25 @@ bool BoUpSLP::canReorderOperands( OpData.second->State == TreeEntry::SplitVectorize); })) continue; - if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) { + // Do not request operands, if they do not exist. + if (UserTE->hasState()) { + if (UserTE->getOpcode() == Instruction::ExtractElement || + UserTE->getOpcode() == Instruction::ExtractValue) + continue; + if (UserTE->getOpcode() == Instruction::InsertElement && I == 0) + continue; + if (UserTE->getOpcode() == Instruction::Store && + UserTE->State == TreeEntry::Vectorize && I == 1) + continue; + if (UserTE->getOpcode() == Instruction::Load && + (UserTE->State == TreeEntry::Vectorize || + UserTE->State == TreeEntry::StridedVectorize || + UserTE->State == TreeEntry::CompressVectorize)) + continue; + } + TreeEntry *TE = getOperandEntry(UserTE, I); + assert(TE && "Expected operand entry."); + if (!TE->isGather()) { // Add the node to the list of the ordered nodes with the identity // order. Edges.emplace_back(I, TE); @@ -7433,37 +7410,14 @@ bool BoUpSLP::canReorderOperands( // simply add to the list of gathered ops. // If there are reused scalars, process this node as a regular vectorize // node, just reorder reuses mask. - if (TE->State != TreeEntry::Vectorize && - TE->State != TreeEntry::StridedVectorize && - TE->State != TreeEntry::CompressVectorize && - TE->State != TreeEntry::SplitVectorize && + if (TE->State == TreeEntry::ScatterVectorize && TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty()) GatherOps.push_back(TE); continue; } - TreeEntry *Gather = nullptr; - if (count_if(ReorderableGathers, - [&Gather, UserTE, I](TreeEntry *TE) { - assert(TE->State != TreeEntry::Vectorize && - TE->State != TreeEntry::StridedVectorize && - TE->State != TreeEntry::CompressVectorize && - TE->State != TreeEntry::SplitVectorize && - "Only non-vectorized nodes are expected."); - if (TE->UserTreeIndex.UserTE == UserTE && - TE->UserTreeIndex.EdgeIdx == I) { - assert(TE->isSame(UserTE->getOperand(I)) && - "Operand entry does not match operands."); - Gather = TE; - return true; - } - return false; - }) > 1 && - !allConstant(UserTE->getOperand(I))) - return false; - if (Gather) - GatherOps.push_back(Gather); + if (ReorderableGathers.contains(TE)) + GatherOps.push_back(TE); } - return true; } void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { @@ -7479,13 +7433,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { // Find all reorderable leaf nodes with the given VF. // Currently the are vectorized loads,extracts without alternate operands + // some gathering of extracts. - SmallVector NonVectorized; + SmallPtrSet NonVectorized; for (const std::unique_ptr &TE : VectorizableTree) { if (TE->State != TreeEntry::Vectorize && TE->State != TreeEntry::StridedVectorize && TE->State != TreeEntry::CompressVectorize && TE->State != TreeEntry::SplitVectorize) - NonVectorized.push_back(TE.get()); + NonVectorized.insert(TE.get()); if (std::optional CurrentOrder = getReorderingData(*TE, /*TopToBottom=*/false, IgnoreReorder)) { Queue.push(TE.get()); @@ -7584,11 +7538,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { } // Check that operands are used only in the User node. SmallVector GatherOps; - if (!canReorderOperands(Data.first, Data.second, NonVectorized, - GatherOps)) { - Visited.insert_range(llvm::make_second_range(Data.second)); - continue; - } + buildReorderableOperands(Data.first, Data.second, NonVectorized, + GatherOps); // All operands are reordered and used only in this node - propagate the // most used order to the user node. MapVector VL = E->getOperand(Idx); - InstructionsState S = getSameOpcode(VL, *TLI); - // Special processing for GEPs bundle, which may include non-gep values. - if (!S && VL.front()->getType()->isPointerTy()) { - const auto *It = find_if(VL, IsaPred); - if (It != VL.end()) - S = getSameOpcode(*It, *TLI); - } - if (const TreeEntry *VE = getMatchedVectorizedOperand(E, Idx, VL, S)) - return VE; - if (S || !isConstant(VL.front())) { - for (const TreeEntry *VE : - ValueToGatherNodes.lookup(S ? S.getMainOp() : VL.front())) - if (VE->UserTreeIndex.EdgeIdx == Idx && VE->UserTreeIndex.UserTE == E) { - assert(VE->isSame(VL) && "Expected gather node with same values."); - return VE; - } - } - const auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1), - [&](const std::unique_ptr &TE) { - return (TE->isGather() || - TE->State == TreeEntry::SplitVectorize) && - TE->UserTreeIndex.EdgeIdx == Idx && - TE->UserTreeIndex.UserTE == E; - }); - assert(It != VectorizableTree.end() && "Expected vectorizable entry."); - return It->get(); + TreeEntry *Op = OperandsToTreeEntry.at({E, Idx}); + assert(Op->isSame(E->getOperand(Idx)) && "Operands mismatch!"); + return Op; } TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const { @@ -16914,121 +16841,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { } }; -BoUpSLP::TreeEntry * -BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx, - ArrayRef VL, - const InstructionsState &S) { - if (!S) - return nullptr; - for (TreeEntry *TE : ScalarToTreeEntries.lookup(S.getMainOp())) - if (TE->UserTreeIndex.UserTE == E && TE->UserTreeIndex.EdgeIdx == NodeIdx && - TE->isSame(VL)) - return TE; - return nullptr; -} - Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) { - ValueList &VL = E->getOperand(NodeIdx); - InstructionsState S = getSameOpcode(VL, *TLI); - // Special processing for GEPs bundle, which may include non-gep values. - if (!S && VL.front()->getType()->isPointerTy()) { - const auto *It = find_if(VL, IsaPred); - if (It != VL.end()) - S = getSameOpcode(*It, *TLI); - } - const unsigned VF = VL.size(); - if (TreeEntry *VE = getMatchedVectorizedOperand(E, NodeIdx, VL, S)) { - auto FinalShuffle = [&](Value *V, ArrayRef Mask) { - // V may be affected by MinBWs. - // We want ShuffleInstructionBuilder to correctly support REVEC. The key - // factor is the number of elements, not their type. - Type *ScalarTy = cast(V->getType())->getElementType(); - unsigned NumElements = getNumElements(VL.front()->getType()); - ShuffleInstructionBuilder ShuffleBuilder( - NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements) - : ScalarTy, - Builder, *this); - ShuffleBuilder.add(V, Mask); - SmallVector> SubVectors( - E->CombinedEntriesWithIndices.size()); - transform(E->CombinedEntriesWithIndices, SubVectors.begin(), - [&](const auto &P) { - return std::make_pair(VectorizableTree[P.first].get(), - P.second); - }); - assert((E->CombinedEntriesWithIndices.empty() || - E->ReorderIndices.empty()) && - "Expected either combined subnodes or reordering"); - return ShuffleBuilder.finalize({}, SubVectors, {}); - }; - Value *V = vectorizeTree(VE); - if (VF * getNumElements(VL[0]->getType()) != - cast(V->getType())->getNumElements()) { - if (!VE->ReuseShuffleIndices.empty()) { - // Reshuffle to get only unique values. - // If some of the scalars are duplicated in the vectorization - // tree entry, we do not vectorize them but instead generate a - // mask for the reuses. But if there are several users of the - // same entry, they may have different vectorization factors. - // This is especially important for PHI nodes. In this case, we - // need to adapt the resulting instruction for the user - // vectorization factor and have to reshuffle it again to take - // only unique elements of the vector. Without this code the - // function incorrectly returns reduced vector instruction with - // the same elements, not with the unique ones. - - // block: - // %phi = phi <2 x > { .., %entry} {%shuffle, %block} - // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0> - // ... (use %2) - // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0} - // br %block - SmallVector Mask(VF, PoisonMaskElem); - for (auto [I, V] : enumerate(VL)) { - if (isa(V)) - continue; - Mask[I] = VE->findLaneForValue(V); - } - V = FinalShuffle(V, Mask); - } else { - assert(VF < cast(V->getType())->getNumElements() && - "Expected vectorization factor less " - "than original vector size."); - SmallVector UniformMask(VF, 0); - std::iota(UniformMask.begin(), UniformMask.end(), 0); - V = FinalShuffle(V, UniformMask); - } - } - // Need to update the operand gather node, if actually the operand is not a - // vectorized node, but the buildvector/gather node, which matches one of - // the vectorized nodes. - if (VE->UserTreeIndex.UserTE != E || VE->UserTreeIndex.EdgeIdx != NodeIdx) { - auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1), - [&](const std::unique_ptr &TE) { - return TE->isGather() && - TE->UserTreeIndex.UserTE == E && - TE->UserTreeIndex.EdgeIdx == NodeIdx; - }); - assert(It != VectorizableTree.end() && "Expected gather node operand."); - (*It)->VectorizedValue = V; - } - return V; - } - - // Find the corresponding gather entry and vectorize it. - // Allows to be more accurate with tree/graph transformations, checks for the - // correctness of the transformations in many cases. - auto *I = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1), - [E, NodeIdx](const std::unique_ptr &TE) { - return TE->isOperandGatherNode({E, NodeIdx}) || - (TE->State == TreeEntry::SplitVectorize && - TE->UserTreeIndex == EdgeInfo(E, NodeIdx)); - }); - assert(I != VectorizableTree.end() && "Gather node is not in the graph."); - assert(I->get()->UserTreeIndex && - "Expected only single user for the gather node."); - assert(I->get()->isSame(VL) && "Expected same list of scalars."); - return vectorizeTree(I->get()); + return vectorizeTree(getOperandEntry(E, NodeIdx)); } template