Skip to content

Commit 5755774

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.5
1 parent c28d6c2 commit 5755774

File tree

1 file changed

+49
-235
lines changed

1 file changed

+49
-235
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 49 additions & 235 deletions
Original file line numberDiff line numberDiff line change
@@ -1886,6 +1886,7 @@ class BoUpSLP {
18861886
void deleteTree() {
18871887
VectorizableTree.clear();
18881888
ScalarToTreeEntries.clear();
1889+
OperandsToTreeEntry.clear();
18891890
ScalarsInSplitNodes.clear();
18901891
MustGather.clear();
18911892
NonScheduledFirst.clear();
@@ -3401,54 +3402,23 @@ class BoUpSLP {
34013402
const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
34023403
bool &IsProfitableToDemote, bool IsTruncRoot) const;
34033404

3404-
/// Check if the operands on the edges \p Edges of the \p UserTE allows
3405-
/// reordering (i.e. the operands can be reordered because they have only one
3406-
/// user and reordarable).
3405+
/// Builds the list of reorderable operands on the edges \p Edges of the \p
3406+
/// UserTE, which allow reordering (i.e. the operands can be reordered because
3407+
/// they have only one user and reordarable).
34073408
/// \param ReorderableGathers List of all gather nodes that require reordering
34083409
/// (e.g., gather of extractlements or partially vectorizable loads).
34093410
/// \param GatherOps List of gather operand nodes for \p UserTE that require
34103411
/// reordering, subset of \p NonVectorized.
3411-
bool
3412-
canReorderOperands(TreeEntry *UserTE,
3413-
SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
3414-
ArrayRef<TreeEntry *> ReorderableGathers,
3415-
SmallVectorImpl<TreeEntry *> &GatherOps);
3412+
void buildReorderableOperands(
3413+
TreeEntry *UserTE,
3414+
SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
3415+
const SmallPtrSetImpl<const TreeEntry *> &ReorderableGathers,
3416+
SmallVectorImpl<TreeEntry *> &GatherOps);
34163417

34173418
/// Checks if the given \p TE is a gather node with clustered reused scalars
34183419
/// and reorders it per given \p Mask.
34193420
void reorderNodeWithReuses(TreeEntry &TE, ArrayRef<int> Mask) const;
34203421

3421-
/// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
3422-
/// if any. If it is not vectorized (gather node), returns nullptr.
3423-
TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) {
3424-
ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
3425-
TreeEntry *TE = nullptr;
3426-
const auto *It = find_if(VL, [&](Value *V) {
3427-
if (!isa<Instruction>(V))
3428-
return false;
3429-
for (TreeEntry *E : getTreeEntries(V)) {
3430-
if (E->UserTreeIndex == EdgeInfo(UserTE, OpIdx)) {
3431-
TE = E;
3432-
return true;
3433-
}
3434-
}
3435-
return false;
3436-
});
3437-
if (It != VL.end()) {
3438-
assert(TE->isSame(VL) && "Expected same scalars.");
3439-
return TE;
3440-
}
3441-
return nullptr;
3442-
}
3443-
3444-
/// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
3445-
/// if any. If it is not vectorized (gather node), returns nullptr.
3446-
const TreeEntry *getVectorizedOperand(const TreeEntry *UserTE,
3447-
unsigned OpIdx) const {
3448-
return const_cast<BoUpSLP *>(this)->getVectorizedOperand(
3449-
const_cast<TreeEntry *>(UserTE), OpIdx);
3450-
}
3451-
34523422
/// Checks if all users of \p I are the part of the vectorization tree.
34533423
bool areAllUsersVectorized(
34543424
Instruction *I,
@@ -3509,19 +3479,6 @@ class BoUpSLP {
35093479
/// Vectorize a single entry in the tree.
35103480
Value *vectorizeTree(TreeEntry *E);
35113481

3512-
/// Returns vectorized operand node, that matches the order of the scalars
3513-
/// operand number \p NodeIdx in entry \p E.
3514-
TreeEntry *getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
3515-
ArrayRef<Value *> VL,
3516-
const InstructionsState &S);
3517-
const TreeEntry *
3518-
getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
3519-
ArrayRef<Value *> VL,
3520-
const InstructionsState &S) const {
3521-
return const_cast<BoUpSLP *>(this)->getMatchedVectorizedOperand(E, NodeIdx,
3522-
VL, S);
3523-
}
3524-
35253482
/// Vectorize a single entry in the tree, the \p Idx-th operand of the entry
35263483
/// \p E.
35273484
Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx);
@@ -3715,11 +3672,6 @@ class BoUpSLP {
37153672
return IsSame(Scalars, ReuseShuffleIndices);
37163673
}
37173674

3718-
bool isOperandGatherNode(const EdgeInfo &UserEI) const {
3719-
return isGather() && UserTreeIndex.EdgeIdx == UserEI.EdgeIdx &&
3720-
UserTreeIndex.UserTE == UserEI.UserTE;
3721-
}
3722-
37233675
/// \returns true if current entry has same operands as \p TE.
37243676
bool hasEqualOperands(const TreeEntry &TE) const {
37253677
if (TE.getNumOperands() != getNumOperands())
@@ -4107,6 +4059,9 @@ class BoUpSLP {
41074059
TreeEntry *Last = VectorizableTree.back().get();
41084060
Last->Idx = VectorizableTree.size() - 1;
41094061
Last->State = EntryState;
4062+
if (UserTreeIdx.UserTE)
4063+
OperandsToTreeEntry.try_emplace(
4064+
std::make_pair(UserTreeIdx.UserTE, UserTreeIdx.EdgeIdx), Last);
41104065
// FIXME: Remove once support for ReuseShuffleIndices has been implemented
41114066
// for non-power-of-two vectors.
41124067
assert(
@@ -4298,6 +4253,10 @@ class BoUpSLP {
42984253
/// Maps a specific scalar to its tree entry(ies).
42994254
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
43004255

4256+
/// Maps the operand index and entry to the corresponding tree entry.
4257+
SmallDenseMap<std::pair<const TreeEntry *, unsigned>, TreeEntry *>
4258+
OperandsToTreeEntry;
4259+
43014260
/// Scalars, used in split vectorize nodes.
43024261
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarsInSplitNodes;
43034262

@@ -7411,11 +7370,11 @@ void BoUpSLP::reorderTopToBottom() {
74117370
}
74127371
}
74137372

7414-
bool BoUpSLP::canReorderOperands(
7373+
void BoUpSLP::buildReorderableOperands(
74157374
TreeEntry *UserTE, SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
7416-
ArrayRef<TreeEntry *> ReorderableGathers,
7375+
const SmallPtrSetImpl<const TreeEntry *> &ReorderableGathers,
74177376
SmallVectorImpl<TreeEntry *> &GatherOps) {
7418-
for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) {
7377+
for (unsigned I : seq<unsigned>(UserTE->getNumOperands())) {
74197378
if (any_of(Edges, [I](const std::pair<unsigned, TreeEntry *> &OpData) {
74207379
return OpData.first == I &&
74217380
(OpData.second->State == TreeEntry::Vectorize ||
@@ -7424,7 +7383,25 @@ bool BoUpSLP::canReorderOperands(
74247383
OpData.second->State == TreeEntry::SplitVectorize);
74257384
}))
74267385
continue;
7427-
if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) {
7386+
// Do not request operands, if they do not exist.
7387+
if (UserTE->hasState()) {
7388+
if (UserTE->getOpcode() == Instruction::ExtractElement ||
7389+
UserTE->getOpcode() == Instruction::ExtractValue)
7390+
continue;
7391+
if (UserTE->getOpcode() == Instruction::InsertElement && I == 0)
7392+
continue;
7393+
if (UserTE->getOpcode() == Instruction::Store &&
7394+
UserTE->State == TreeEntry::Vectorize && I == 1)
7395+
continue;
7396+
if (UserTE->getOpcode() == Instruction::Load &&
7397+
(UserTE->State == TreeEntry::Vectorize ||
7398+
UserTE->State == TreeEntry::StridedVectorize ||
7399+
UserTE->State == TreeEntry::CompressVectorize))
7400+
continue;
7401+
}
7402+
TreeEntry *TE = getOperandEntry(UserTE, I);
7403+
assert(TE && "Expected operand entry.");
7404+
if (!TE->isGather()) {
74287405
// Add the node to the list of the ordered nodes with the identity
74297406
// order.
74307407
Edges.emplace_back(I, TE);
@@ -7433,37 +7410,14 @@ bool BoUpSLP::canReorderOperands(
74337410
// simply add to the list of gathered ops.
74347411
// If there are reused scalars, process this node as a regular vectorize
74357412
// node, just reorder reuses mask.
7436-
if (TE->State != TreeEntry::Vectorize &&
7437-
TE->State != TreeEntry::StridedVectorize &&
7438-
TE->State != TreeEntry::CompressVectorize &&
7439-
TE->State != TreeEntry::SplitVectorize &&
7413+
if (TE->State == TreeEntry::ScatterVectorize &&
74407414
TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty())
74417415
GatherOps.push_back(TE);
74427416
continue;
74437417
}
7444-
TreeEntry *Gather = nullptr;
7445-
if (count_if(ReorderableGathers,
7446-
[&Gather, UserTE, I](TreeEntry *TE) {
7447-
assert(TE->State != TreeEntry::Vectorize &&
7448-
TE->State != TreeEntry::StridedVectorize &&
7449-
TE->State != TreeEntry::CompressVectorize &&
7450-
TE->State != TreeEntry::SplitVectorize &&
7451-
"Only non-vectorized nodes are expected.");
7452-
if (TE->UserTreeIndex.UserTE == UserTE &&
7453-
TE->UserTreeIndex.EdgeIdx == I) {
7454-
assert(TE->isSame(UserTE->getOperand(I)) &&
7455-
"Operand entry does not match operands.");
7456-
Gather = TE;
7457-
return true;
7458-
}
7459-
return false;
7460-
}) > 1 &&
7461-
!allConstant(UserTE->getOperand(I)))
7462-
return false;
7463-
if (Gather)
7464-
GatherOps.push_back(Gather);
7418+
if (ReorderableGathers.contains(TE))
7419+
GatherOps.push_back(TE);
74657420
}
7466-
return true;
74677421
}
74687422

74697423
void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
@@ -7479,13 +7433,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
74797433
// Find all reorderable leaf nodes with the given VF.
74807434
// Currently the are vectorized loads,extracts without alternate operands +
74817435
// some gathering of extracts.
7482-
SmallVector<TreeEntry *> NonVectorized;
7436+
SmallPtrSet<const TreeEntry *, 4> NonVectorized;
74837437
for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
74847438
if (TE->State != TreeEntry::Vectorize &&
74857439
TE->State != TreeEntry::StridedVectorize &&
74867440
TE->State != TreeEntry::CompressVectorize &&
74877441
TE->State != TreeEntry::SplitVectorize)
7488-
NonVectorized.push_back(TE.get());
7442+
NonVectorized.insert(TE.get());
74897443
if (std::optional<OrdersType> CurrentOrder =
74907444
getReorderingData(*TE, /*TopToBottom=*/false, IgnoreReorder)) {
74917445
Queue.push(TE.get());
@@ -7584,11 +7538,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
75847538
}
75857539
// Check that operands are used only in the User node.
75867540
SmallVector<TreeEntry *> GatherOps;
7587-
if (!canReorderOperands(Data.first, Data.second, NonVectorized,
7588-
GatherOps)) {
7589-
Visited.insert_range(llvm::make_second_range(Data.second));
7590-
continue;
7591-
}
7541+
buildReorderableOperands(Data.first, Data.second, NonVectorized,
7542+
GatherOps);
75927543
// All operands are reordered and used only in this node - propagate the
75937544
// most used order to the user node.
75947545
MapVector<OrdersType, unsigned,
@@ -12916,33 +12867,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
1291612867

1291712868
const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E,
1291812869
unsigned Idx) const {
12919-
ArrayRef<Value *> VL = E->getOperand(Idx);
12920-
InstructionsState S = getSameOpcode(VL, *TLI);
12921-
// Special processing for GEPs bundle, which may include non-gep values.
12922-
if (!S && VL.front()->getType()->isPointerTy()) {
12923-
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
12924-
if (It != VL.end())
12925-
S = getSameOpcode(*It, *TLI);
12926-
}
12927-
if (const TreeEntry *VE = getMatchedVectorizedOperand(E, Idx, VL, S))
12928-
return VE;
12929-
if (S || !isConstant(VL.front())) {
12930-
for (const TreeEntry *VE :
12931-
ValueToGatherNodes.lookup(S ? S.getMainOp() : VL.front()))
12932-
if (VE->UserTreeIndex.EdgeIdx == Idx && VE->UserTreeIndex.UserTE == E) {
12933-
assert(VE->isSame(VL) && "Expected gather node with same values.");
12934-
return VE;
12935-
}
12936-
}
12937-
const auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
12938-
[&](const std::unique_ptr<TreeEntry> &TE) {
12939-
return (TE->isGather() ||
12940-
TE->State == TreeEntry::SplitVectorize) &&
12941-
TE->UserTreeIndex.EdgeIdx == Idx &&
12942-
TE->UserTreeIndex.UserTE == E;
12943-
});
12944-
assert(It != VectorizableTree.end() && "Expected vectorizable entry.");
12945-
return It->get();
12870+
TreeEntry *Op = OperandsToTreeEntry.at({E, Idx});
12871+
assert(Op->isSame(E->getOperand(Idx)) && "Operands mismatch!");
12872+
return Op;
1294612873
}
1294712874

1294812875
TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
@@ -16914,121 +16841,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1691416841
}
1691516842
};
1691616843

16917-
BoUpSLP::TreeEntry *
16918-
BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
16919-
ArrayRef<Value *> VL,
16920-
const InstructionsState &S) {
16921-
if (!S)
16922-
return nullptr;
16923-
for (TreeEntry *TE : ScalarToTreeEntries.lookup(S.getMainOp()))
16924-
if (TE->UserTreeIndex.UserTE == E && TE->UserTreeIndex.EdgeIdx == NodeIdx &&
16925-
TE->isSame(VL))
16926-
return TE;
16927-
return nullptr;
16928-
}
16929-
1693016844
Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
16931-
ValueList &VL = E->getOperand(NodeIdx);
16932-
InstructionsState S = getSameOpcode(VL, *TLI);
16933-
// Special processing for GEPs bundle, which may include non-gep values.
16934-
if (!S && VL.front()->getType()->isPointerTy()) {
16935-
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
16936-
if (It != VL.end())
16937-
S = getSameOpcode(*It, *TLI);
16938-
}
16939-
const unsigned VF = VL.size();
16940-
if (TreeEntry *VE = getMatchedVectorizedOperand(E, NodeIdx, VL, S)) {
16941-
auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
16942-
// V may be affected by MinBWs.
16943-
// We want ShuffleInstructionBuilder to correctly support REVEC. The key
16944-
// factor is the number of elements, not their type.
16945-
Type *ScalarTy = cast<VectorType>(V->getType())->getElementType();
16946-
unsigned NumElements = getNumElements(VL.front()->getType());
16947-
ShuffleInstructionBuilder ShuffleBuilder(
16948-
NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements)
16949-
: ScalarTy,
16950-
Builder, *this);
16951-
ShuffleBuilder.add(V, Mask);
16952-
SmallVector<std::pair<const TreeEntry *, unsigned>> SubVectors(
16953-
E->CombinedEntriesWithIndices.size());
16954-
transform(E->CombinedEntriesWithIndices, SubVectors.begin(),
16955-
[&](const auto &P) {
16956-
return std::make_pair(VectorizableTree[P.first].get(),
16957-
P.second);
16958-
});
16959-
assert((E->CombinedEntriesWithIndices.empty() ||
16960-
E->ReorderIndices.empty()) &&
16961-
"Expected either combined subnodes or reordering");
16962-
return ShuffleBuilder.finalize({}, SubVectors, {});
16963-
};
16964-
Value *V = vectorizeTree(VE);
16965-
if (VF * getNumElements(VL[0]->getType()) !=
16966-
cast<FixedVectorType>(V->getType())->getNumElements()) {
16967-
if (!VE->ReuseShuffleIndices.empty()) {
16968-
// Reshuffle to get only unique values.
16969-
// If some of the scalars are duplicated in the vectorization
16970-
// tree entry, we do not vectorize them but instead generate a
16971-
// mask for the reuses. But if there are several users of the
16972-
// same entry, they may have different vectorization factors.
16973-
// This is especially important for PHI nodes. In this case, we
16974-
// need to adapt the resulting instruction for the user
16975-
// vectorization factor and have to reshuffle it again to take
16976-
// only unique elements of the vector. Without this code the
16977-
// function incorrectly returns reduced vector instruction with
16978-
// the same elements, not with the unique ones.
16979-
16980-
// block:
16981-
// %phi = phi <2 x > { .., %entry} {%shuffle, %block}
16982-
// %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
16983-
// ... (use %2)
16984-
// %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
16985-
// br %block
16986-
SmallVector<int> Mask(VF, PoisonMaskElem);
16987-
for (auto [I, V] : enumerate(VL)) {
16988-
if (isa<PoisonValue>(V))
16989-
continue;
16990-
Mask[I] = VE->findLaneForValue(V);
16991-
}
16992-
V = FinalShuffle(V, Mask);
16993-
} else {
16994-
assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
16995-
"Expected vectorization factor less "
16996-
"than original vector size.");
16997-
SmallVector<int> UniformMask(VF, 0);
16998-
std::iota(UniformMask.begin(), UniformMask.end(), 0);
16999-
V = FinalShuffle(V, UniformMask);
17000-
}
17001-
}
17002-
// Need to update the operand gather node, if actually the operand is not a
17003-
// vectorized node, but the buildvector/gather node, which matches one of
17004-
// the vectorized nodes.
17005-
if (VE->UserTreeIndex.UserTE != E || VE->UserTreeIndex.EdgeIdx != NodeIdx) {
17006-
auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
17007-
[&](const std::unique_ptr<TreeEntry> &TE) {
17008-
return TE->isGather() &&
17009-
TE->UserTreeIndex.UserTE == E &&
17010-
TE->UserTreeIndex.EdgeIdx == NodeIdx;
17011-
});
17012-
assert(It != VectorizableTree.end() && "Expected gather node operand.");
17013-
(*It)->VectorizedValue = V;
17014-
}
17015-
return V;
17016-
}
17017-
17018-
// Find the corresponding gather entry and vectorize it.
17019-
// Allows to be more accurate with tree/graph transformations, checks for the
17020-
// correctness of the transformations in many cases.
17021-
auto *I = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
17022-
[E, NodeIdx](const std::unique_ptr<TreeEntry> &TE) {
17023-
return TE->isOperandGatherNode({E, NodeIdx}) ||
17024-
(TE->State == TreeEntry::SplitVectorize &&
17025-
TE->UserTreeIndex == EdgeInfo(E, NodeIdx));
17026-
});
17027-
assert(I != VectorizableTree.end() && "Gather node is not in the graph.");
17028-
assert(I->get()->UserTreeIndex &&
17029-
"Expected only single user for the gather node.");
17030-
assert(I->get()->isSame(VL) && "Expected same list of scalars.");
17031-
return vectorizeTree(I->get());
16845+
return vectorizeTree(getOperandEntry(E, NodeIdx));
1703216846
}
1703316847

1703416848
template <typename BVTy, typename ResTy, typename... Args>

0 commit comments

Comments
 (0)