-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[SLP]Exclude non-profitable subtrees. #162018
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[SLP]Exclude non-profitable subtrees. #162018
Conversation
Created using spr 1.3.7
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-risc-v Author: Alexey Bataev (alexey-bataev) ChangesInitial support for SLP tree throttling. Trims non-profitable subtrees, Does not support trees with gathered loads yet, since they are not quite Patch is 179.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162018.diff 32 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fedca65d241e8..b633dd4d9fdb0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2005,9 +2005,15 @@ class BoUpSLP {
/// holding live values over call sites.
InstructionCost getSpillCost();
+ /// Calculates the cost of the subtrees, trims non-profitable ones and returns
+ /// final cost.
+ InstructionCost
+ calculateTreeCostAndTrimNonProfitable(ArrayRef<Value *> VectorizedVals = {});
+
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
- InstructionCost getTreeCost(ArrayRef<Value *> VectorizedVals = {},
+ InstructionCost getTreeCost(InstructionCost TreeCost,
+ ArrayRef<Value *> VectorizedVals = {},
InstructionCost ReductionCost = TTI::TCC_Free);
/// Construct a vectorizable tree that starts at \p Roots, ignoring users for
@@ -2080,6 +2086,8 @@ class BoUpSLP {
void deleteTree() {
VectorizableTree.clear();
ScalarToTreeEntries.clear();
+ DeletedNodes.clear();
+ TransformedToGatherNodes.clear();
OperandsToTreeEntry.clear();
ScalarsInSplitNodes.clear();
MustGather.clear();
@@ -4511,6 +4519,13 @@ class BoUpSLP {
/// Maps a specific scalar to its tree entry(ies).
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
+ /// List of deleted non-profitable nodes.
+ SmallPtrSet<const TreeEntry *, 8> DeletedNodes;
+
+ /// List of nodes, transformed to gathered, with their conservative
+ /// gather/buildvector cost estimation.
+ SmallDenseMap<const TreeEntry *, InstructionCost> TransformedToGatherNodes;
+
/// Maps the operand index and entry to the corresponding tree entry.
SmallDenseMap<std::pair<const TreeEntry *, unsigned>, TreeEntry *>
OperandsToTreeEntry;
@@ -8697,7 +8712,9 @@ void BoUpSLP::buildExternalUses(
TreeEntry *Entry = TEPtr.get();
// No need to handle users of gathered values.
- if (Entry->isGather() || Entry->State == TreeEntry::SplitVectorize)
+ if (Entry->isGather() || Entry->State == TreeEntry::SplitVectorize ||
+ DeletedNodes.contains(Entry) ||
+ TransformedToGatherNodes.contains(Entry))
continue;
// For each lane:
@@ -8744,7 +8761,11 @@ void BoUpSLP::buildExternalUses(
// Skip in-tree scalars that become vectors
if (ArrayRef<TreeEntry *> UseEntries = getTreeEntries(U);
- !UseEntries.empty()) {
+ !UseEntries.empty() &&
+ any_of(UseEntries, [this](const TreeEntry *UseEntry) {
+ return !DeletedNodes.contains(UseEntry) &&
+ !TransformedToGatherNodes.contains(UseEntry);
+ })) {
// Some in-tree scalars will remain as scalar in vectorized
// instructions. If that is the case, the one in FoundLane will
// be used.
@@ -8752,6 +8773,9 @@ void BoUpSLP::buildExternalUses(
isa<LoadInst, StoreInst>(UserInst)) ||
isa<CallInst>(UserInst)) ||
all_of(UseEntries, [&](TreeEntry *UseEntry) {
+ if (DeletedNodes.contains(UseEntry) ||
+ TransformedToGatherNodes.contains(UseEntry))
+ return true;
return UseEntry->State == TreeEntry::ScatterVectorize ||
!doesInTreeUserNeedToExtract(
Scalar, getRootEntryInstruction(*UseEntry), TLI,
@@ -14208,7 +14232,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
unsigned EntryVF = E->getVectorFactor();
auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF);
- if (E->isGather()) {
+ if (E->isGather() || TransformedToGatherNodes.contains(E)) {
if (allConstant(VL))
return 0;
if (isa<InsertElementInst>(VL[0]))
@@ -15892,26 +15916,16 @@ static T *performExtractsShuffleAction(
return Prev;
}
-namespace {
-/// Data type for handling buildvector sequences with the reused scalars from
-/// other tree entries.
-template <typename T> struct ShuffledInsertData {
- /// List of insertelements to be replaced by shuffles.
- SmallVector<InsertElementInst *> InsertElements;
- /// The parent vectors and shuffle mask for the given list of inserts.
- MapVector<T, SmallVector<int>> ValueMasks;
-};
-} // namespace
-
-InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
- InstructionCost ReductionCost) {
- InstructionCost Cost = ReductionCost;
+InstructionCost BoUpSLP::calculateTreeCostAndTrimNonProfitable(
+ ArrayRef<Value *> VectorizedVals) {
+ SmallDenseMap<const TreeEntry *, InstructionCost> NodesCosts;
+ SmallPtrSet<Value *, 4> CheckedExtracts;
+ SmallPtrSet<const TreeEntry *, 4> GatheredLoadsNodes;
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
<< VectorizableTree.size() << ".\n");
-
- SmallPtrSet<Value *, 4> CheckedExtracts;
- for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {
- TreeEntry &TE = *VectorizableTree[I];
+ InstructionCost Cost = 0;
+ for (const std::unique_ptr<TreeEntry> &Ptr : VectorizableTree) {
+ TreeEntry &TE = *Ptr;
// No need to count the cost for combined entries, they are combined and
// just skip their cost.
if (TE.State == TreeEntry::CombinedVectorize) {
@@ -15919,6 +15933,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
dbgs() << "SLP: Skipping cost for combined node that starts with "
<< *TE.Scalars[0] << ".\n";
TE.dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n");
+ NodesCosts.try_emplace(&TE);
continue;
}
if (TE.hasState() &&
@@ -15931,6 +15946,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
LLVM_DEBUG(dbgs() << "SLP: Adding cost 0 for bundle "
<< shortBundleName(TE.Scalars, TE.Idx) << ".\n"
<< "SLP: Current total cost = " << Cost << "\n");
+ NodesCosts.try_emplace(&TE);
continue;
}
}
@@ -15942,11 +15958,202 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
InstructionCost C = getEntryCost(&TE, VectorizedVals, CheckedExtracts);
Cost += C;
+ NodesCosts.try_emplace(&TE, C);
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle "
<< shortBundleName(TE.Scalars, TE.Idx) << ".\n"
<< "SLP: Current total cost = " << Cost << "\n");
+ // Add gathered loads nodes to the set for later processing.
+ if (TE.Idx > 0 && !TE.UserTreeIndex && TE.hasState() &&
+ TE.getOpcode() == Instruction::Load)
+ GatheredLoadsNodes.insert(&TE);
+ }
+ // Bail out if the cost threshold is negative and cost already below it.
+ if (SLPCostThreshold.getNumOccurrences() > 0 && SLPCostThreshold < 0 &&
+ Cost < -SLPCostThreshold)
+ return Cost;
+ // Bail out, if gathered loads nodes are found.
+ // TODO: add analysis for gathered load to include their cost correctly into
+ // the related subtrees.
+ if (!GatheredLoadsNodes.empty())
+ return Cost;
+ SmallVector<std::pair<InstructionCost, SmallVector<unsigned>>> SubtreeCosts(
+ VectorizableTree.size());
+ for (const std::unique_ptr<TreeEntry> &Ptr : VectorizableTree) {
+ TreeEntry &TE = *Ptr;
+ InstructionCost C = NodesCosts.at(&TE);
+ SubtreeCosts[TE.Idx].first += C;
+ const TreeEntry *UserTE = TE.UserTreeIndex.UserTE;
+ while (UserTE) {
+ SubtreeCosts[UserTE->Idx].first += C;
+ SubtreeCosts[UserTE->Idx].second.push_back(TE.Idx);
+ UserTE = UserTE->UserTreeIndex.UserTE;
+ }
+ }
+ using CostIndicesTy =
+ std::pair<TreeEntry *, std::pair<InstructionCost, SmallVector<unsigned>>>;
+ struct FirstGreater {
+ bool operator()(const CostIndicesTy &LHS, const CostIndicesTy &RHS) const {
+ return LHS.second.first < RHS.second.first ||
+ (LHS.second.first == RHS.second.first &&
+ LHS.first->Idx < RHS.first->Idx);
+ }
+ };
+ PriorityQueue<CostIndicesTy, SmallVector<CostIndicesTy>, FirstGreater>
+ Worklist;
+ for (const auto [Idx, P] : enumerate(SubtreeCosts))
+ Worklist.emplace(VectorizableTree[Idx].get(), P);
+
+ // Narrow store trees with non-profitable immediate values - exit.
+ if (!UserIgnoreList && VectorizableTree.front()->getVectorFactor() < 4 &&
+ VectorizableTree.front()->hasState() &&
+ VectorizableTree.front()->getOpcode() == Instruction::Store &&
+ (Worklist.top().first->Idx == 0 || Worklist.top().first->Idx == 1))
+ return Cost;
+
+ bool Changed = false;
+ while (!Worklist.empty() && Worklist.top().second.first > 0) {
+ TreeEntry *TE = Worklist.top().first;
+ if (TE->isGather() || TE->Idx == 0 || DeletedNodes.contains(TE)) {
+ Worklist.pop();
+ continue;
+ }
+
+ // Calculate the gather cost of the root node.
+ InstructionCost SubtreeCost = Worklist.top().second.first;
+ if (SubtreeCost < TE->Scalars.size()) {
+ Worklist.pop();
+ continue;
+ }
+ if (!TransformedToGatherNodes.empty()) {
+ for (unsigned Idx : Worklist.top().second.second) {
+ auto It = TransformedToGatherNodes.find(VectorizableTree[Idx].get());
+ if (It != TransformedToGatherNodes.end()) {
+ SubtreeCost -= SubtreeCosts[Idx].first;
+ SubtreeCost += It->second;
+ }
+ }
+ }
+ if (SubtreeCost < 0 || SubtreeCost < TE->Scalars.size()) {
+ Worklist.pop();
+ continue;
+ }
+ const unsigned Sz = TE->Scalars.size();
+ APInt DemandedElts = APInt::getAllOnes(Sz);
+ for (auto [Idx, V] : enumerate(TE->Scalars)) {
+ if (isConstant(V))
+ DemandedElts.clearBit(Idx);
+ }
+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+
+ Type *ScalarTy = getValueType(TE->Scalars.front());
+ auto *VecTy = getWidenedType(ScalarTy, Sz);
+ const unsigned EntryVF = TE->getVectorFactor();
+ auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF);
+ InstructionCost GatherCost = ::getScalarizationOverhead(
+ *TTI, ScalarTy, VecTy, DemandedElts,
+ /*Insert=*/true, /*Extract=*/false, CostKind);
+ SmallVector<int> Mask;
+ if (!TE->ReorderIndices.empty() &&
+ TE->State != TreeEntry::CompressVectorize &&
+ (TE->State != TreeEntry::StridedVectorize ||
+ !isReverseOrder(TE->ReorderIndices))) {
+ SmallVector<int> NewMask;
+ if (TE->getOpcode() == Instruction::Store) {
+ // For stores the order is actually a mask.
+ NewMask.resize(TE->ReorderIndices.size());
+ copy(TE->ReorderIndices, NewMask.begin());
+ } else {
+ inversePermutation(TE->ReorderIndices, NewMask);
+ }
+ ::addMask(Mask, NewMask);
+ }
+ if (!TE->ReuseShuffleIndices.empty())
+ ::addMask(Mask, TE->ReuseShuffleIndices);
+ if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask, EntryVF))
+ GatherCost +=
+ ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
+ // If all scalars are reused in gather node(s) or other vector nodes, there
+ // might be extra cost for inserting them.
+ if (all_of(TE->Scalars, [&](Value *V) {
+ return (TE->hasCopyableElements() && TE->isCopyableElement(V)) ||
+ isConstant(V) || isGathered(V) || getTreeEntries(V).size() > 1;
+ }))
+ GatherCost *= 2;
+ // Erase subtree if it is non-profitable.
+ if (SubtreeCost > GatherCost) {
+ // If the remaining tree is just a buildvector - exit, it will cause
+ // enless attempts to vectorize.
+ if (VectorizableTree.front()->hasState() &&
+ VectorizableTree.front()->getOpcode() == Instruction::InsertElement &&
+ TE->Idx == 1)
+ return InstructionCost::getInvalid();
+
+ LLVM_DEBUG(dbgs() << "SLP: Trimming unprofitable subtree at node "
+ << TE->Idx << " with cost "
+ << Worklist.top().second.first << " and gather cost "
+ << GatherCost << ".\n");
+ if (TE->UserTreeIndex) {
+ TransformedToGatherNodes.try_emplace(TE, GatherCost);
+ NodesCosts.erase(TE);
+ } else {
+ DeletedNodes.insert(TE);
+ TransformedToGatherNodes.erase(TE);
+ NodesCosts.erase(TE);
+ }
+ for (unsigned Idx : Worklist.top().second.second) {
+ TreeEntry &ChildTE = *VectorizableTree[Idx];
+ DeletedNodes.insert(&ChildTE);
+ TransformedToGatherNodes.erase(&ChildTE);
+ NodesCosts.erase(&ChildTE);
+ }
+ Changed = true;
+ }
+ Worklist.pop();
+ }
+ if (!Changed)
+ return SubtreeCosts.front().first;
+
+ for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
+ if (DeletedNodes.contains(TE.get()))
+ continue;
+ if (TransformedToGatherNodes.contains(TE.get()) && !TE->UserTreeIndex) {
+ assert(TE->getOpcode() == Instruction::Load && "Expected load only.");
+ continue;
+ }
+ if (!NodesCosts.contains(TE.get())) {
+ InstructionCost C =
+ getEntryCost(TE.get(), VectorizedVals, CheckedExtracts);
+ NodesCosts.try_emplace(TE.get(), C);
+ }
}
+ LLVM_DEBUG(dbgs() << "SLP: Recalculate costs after tree trimming.\n");
+ Cost = 0;
+ for (const auto &P : NodesCosts){
+ Cost += P.second;
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << P.second << " for bundle "
+ << shortBundleName(P.first->Scalars, P.first->Idx) << ".\n"
+ << "SLP: Current total cost = " << Cost << "\n");
+ }
+ return Cost;
+}
+
+namespace {
+/// Data type for handling buildvector sequences with the reused scalars from
+/// other tree entries.
+template <typename T> struct ShuffledInsertData {
+ /// List of insertelements to be replaced by shuffles.
+ SmallVector<InsertElementInst *> InsertElements;
+ /// The parent vectors and shuffle mask for the given list of inserts.
+ MapVector<T, SmallVector<int>> ValueMasks;
+};
+} // namespace
+
+InstructionCost BoUpSLP::getTreeCost(InstructionCost TreeCost,
+ ArrayRef<Value *> VectorizedVals,
+ InstructionCost ReductionCost) {
+ InstructionCost Cost = TreeCost + ReductionCost;
+
if (Cost >= -SLPCostThreshold &&
none_of(ExternalUses, [](const ExternalUser &EU) {
return isa_and_nonnull<InsertElementInst>(EU.User);
@@ -16243,8 +16450,15 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
for (Value *V : ScalarOpsFromCasts) {
ExternalUsesAsOriginalScalar.insert(V);
if (ArrayRef<TreeEntry *> TEs = getTreeEntries(V); !TEs.empty()) {
- ExternalUses.emplace_back(V, nullptr, *TEs.front(),
- TEs.front()->findLaneForValue(V));
+ const auto *It = find_if_not(TEs, [&](TreeEntry *TE) {
+ return TransformedToGatherNodes.contains(TE) ||
+ DeletedNodes.contains(TE);
+ });
+ if (It != TEs.end()) {
+ const TreeEntry *UserTE = *It;
+ ExternalUses.emplace_back(V, nullptr, *UserTE,
+ UserTE->findLaneForValue(V));
+ }
}
}
// Add reduced value cost, if resized.
@@ -16710,8 +16924,22 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
continue;
// Build a list of tree entries where V is used.
SmallPtrSet<const TreeEntry *, 4> VToTEs;
- for (const TreeEntry *TEPtr : ValueToGatherNodes.lookup(V)) {
- if (TEPtr == TE || TEPtr->Idx == 0)
+ SmallVector<const TreeEntry *> GatherNodes(
+ ValueToGatherNodes.lookup(V).takeVector());
+ if (TransformedToGatherNodes.contains(TE)) {
+ for (TreeEntry *E : getSplitTreeEntries(V)) {
+ if (TE == E || !TransformedToGatherNodes.contains(E))
+ continue;
+ GatherNodes.push_back(E);
+ }
+ for (TreeEntry *E : getTreeEntries(V)) {
+ if (TE == E || !TransformedToGatherNodes.contains(E))
+ continue;
+ GatherNodes.push_back(E);
+ }
+ }
+ for (const TreeEntry *TEPtr : GatherNodes) {
+ if (TEPtr == TE || TEPtr->Idx == 0 || DeletedNodes.contains(TEPtr))
continue;
assert(any_of(TEPtr->Scalars,
[&](Value *V) { return GatheredScalars.contains(V); }) &&
@@ -16787,8 +17015,10 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
VToTEs.insert(TEPtr);
}
if (ArrayRef<TreeEntry *> VTEs = getSplitTreeEntries(V); !VTEs.empty()) {
- const auto *It = find_if(
- VTEs, [&](const TreeEntry *MTE) { return MTE != TEUseEI.UserTE; });
+ const auto *It = find_if(VTEs, [&](const TreeEntry *MTE) {
+ return MTE != TE && MTE != TEUseEI.UserTE &&
+ !DeletedNodes.contains(MTE);
+ });
if (It != VTEs.end()) {
const TreeEntry *VTE = *It;
if (none_of(TE->CombinedEntriesWithIndices,
@@ -16804,28 +17034,34 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
}
}
if (ArrayRef<TreeEntry *> VTEs = getTreeEntries(V); !VTEs.empty()) {
- const TreeEntry *VTE = VTEs.front();
- if (ForOrder && VTE->Idx < GatheredLoadsEntriesFirst.value_or(0) &&
- VTEs.size() > 1 && VTE->State != TreeEntry::Vectorize) {
- VTEs = VTEs.drop_front();
- // Iterate through all vectorized nodes.
- const auto *MIt = find_if(VTEs, [](const TreeEntry *MTE) {
- return MTE->State == TreeEntry::Vectorize;
- });
- if (MIt == VTEs.end())
- continue;
- VTE = *MIt;
- }
- if (none_of(TE->CombinedEntriesWithIndices,
- [&](const auto &P) { return P.first == VTE->Idx; })) {
- Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
- if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
- continue;
+ const auto *It = find_if(VTEs, [&, MainTE = TE](const TreeEntry *TE) {
+ return TE != MainTE && !DeletedNodes.contains(TE) &&
+ !TransformedToGatherNodes.contains(TE);
+ });
+ if (It != VTEs.end()) {
+ const TreeEntry *VTE = *It;
+ if (ForOrder && VTE->Idx < GatheredLoadsEntriesFirst.value_or(0) &&
+ VTEs.size() > 1 && VTE->State != TreeEntry::Vectorize) {
+ VTEs = VTEs.drop_front();
+ // Iterate through all vectorized nodes.
+ const auto *MIt = find_if(VTEs, [](const TreeEntry *MTE) {
+ return MTE->State == TreeEntry::Vectorize;
+ });
+ if (MIt == VTEs.end())
+ continue;
+ VTE = *MIt;
+ }
+ if (none_of(TE->CombinedEntriesWithIndices,
+ [&](const auto &P) { return P.first == VTE->Idx; })) {
+ Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
+ if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst))
+ continue;
+ }
+ // The node is reused - exit.
+ if (CheckAndUseSameNode(VTE))
+ break;
+ VToTEs.insert(VTE);
}
- // The node is reused - exit.
- if (CheckAndUseSameNode(VTE))
- break;
- VToTEs.insert(VTE);
}
if (VToTEs.empty())
continue;
@@ -17658,7 +17894,12 @@ Value *BoUpSLP::gather(
CSEBlocks.insert(InsElt->getParent());
// Add to our 'need-to-extract' list.
if (isa<Instruction>(V)) {
- if (ArrayRef<TreeEntry *> Entries = getTreeEntries(V); !Entries.empty()) {
+ ArrayRef<TreeEntry *> Entries = getTreeEntries(V);
+ const auto *It = find_if(Entries, [&](const TreeEntry *E) {
+ return !TransformedToGatherNodes.contains(E) &&
+ !DeletedNodes.contains(E);
+ });
+ if (It != Entries.end()) {
// Find which lane we need to extract.
User *UserOp = nullptr;
if (Scalar != V) {
@@ -17690,8 +17931,8 @@ Value *BoUpSLP::gather(
UserOp = InsElt;
}
if (UserOp) {
- unsigned FoundLane = Entries.front()->findLaneForValue(V);
- ExternalUses.emplace_back(V, UserOp, *Entries.front(), FoundLane);
+ unsigned FoundLane = (*It)->findLaneForValue(V);
+ ExternalUses.emplace_back(V, User...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll llvm/test/Transforms/SLPVectorizer/AArch64/div.ll llvm/test/Transforms/SLPVectorizer/AArch64/gather-buildvector-with-minbitwidth-user.ll llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll llvm/test/Transforms/SLPVectorizer/RISCV/partial-vec-invalid-cost.ll llvm/test/Transforms/SLPVectorizer/RISCV/reordered-buildvector-scalars.ll llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll llvm/test/Transforms/SLPVectorizer/X86/buildvectors-parent-phi-nodes.ll llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll llvm/test/Transforms/SLPVectorizer/X86/original-inst-scheduled-after-copyable.ll llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-different-parents.ll llvm/test/Transforms/SLPVectorizer/X86/scalarize-ctlz.ll llvm/test/Transforms/SLPVectorizer/X86/split-node-reorder-node-with-ops.ll llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll llvm/test/Transforms/SLPVectorizer/X86/trunced-buildvector-scalar-extended.ll llvm/test/Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll llvm/test/Transforms/SLPVectorizer/gather_extract_from_vectorbuild.ll llvm/test/Transforms/SLPVectorizer/vectorize-reorder-alt-shuffle.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
Created using spr 1.3.7
Initial support for SLP tree throttling. Trims non-profitable subtrees,
trying to maximize perf gains.
Does not support trees with gathered loads yet, since they are not quite
trees, but graphs. Analysis should be added later.