Skip to content

Commit b3731b3

Browse files
Prince781tru
authored andcommitted
[DAGCombiner] cache negative result from getMergeStoreCandidates() (#106949)
Cache negative search result from getStoreMergeCandidates() so that mergeConsecutiveStores() does not iterate quadratically over a potentially long sequence of unmergeable stores. (cherry picked from commit 8f77d37)
1 parent 149bfdd commit b3731b3

File tree

1 file changed

+51
-32
lines changed

1 file changed

+51
-32
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 51 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,11 @@ namespace {
191191
// AA - Used for DAG load/store alias analysis.
192192
AliasAnalysis *AA;
193193

194+
/// This caches all chains that have already been processed in
195+
/// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
196+
/// stores candidates.
197+
SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
198+
194199
/// When an instruction is simplified, add all users of the instruction to
195200
/// the work lists because they might get more simplified now.
196201
void AddUsersToWorklist(SDNode *N) {
@@ -776,11 +781,10 @@ namespace {
776781
bool UseTrunc);
777782

778783
/// This is a helper function for mergeConsecutiveStores. Stores that
779-
/// potentially may be merged with St are placed in StoreNodes. RootNode is
780-
/// a chain predecessor to all store candidates.
781-
void getStoreMergeCandidates(StoreSDNode *St,
782-
SmallVectorImpl<MemOpLink> &StoreNodes,
783-
SDNode *&Root);
784+
/// potentially may be merged with St are placed in StoreNodes. On success,
785+
/// returns a chain predecessor to all store candidates.
786+
SDNode *getStoreMergeCandidates(StoreSDNode *St,
787+
SmallVectorImpl<MemOpLink> &StoreNodes);
784788

785789
/// Helper function for mergeConsecutiveStores. Checks if candidate stores
786790
/// have indirect dependency through their operands. RootNode is the
@@ -1782,6 +1786,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
17821786

17831787
++NodesCombined;
17841788

1789+
// Invalidate cached info.
1790+
ChainsWithoutMergeableStores.clear();
1791+
17851792
// If we get back the same node we passed in, rather than a new node or
17861793
// zero, we know that the node must have defined multiple values and
17871794
// CombineTo was used. Since CombineTo takes care of the worklist
@@ -20372,15 +20379,15 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
2037220379
return true;
2037320380
}
2037420381

20375-
void DAGCombiner::getStoreMergeCandidates(
20376-
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20377-
SDNode *&RootNode) {
20382+
SDNode *
20383+
DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
20384+
SmallVectorImpl<MemOpLink> &StoreNodes) {
2037820385
// This holds the base pointer, index, and the offset in bytes from the base
2037920386
// pointer. We must have a base and an offset. Do not handle stores to undef
2038020387
// base pointers.
2038120388
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
2038220389
if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20383-
return;
20390+
return nullptr;
2038420391

2038520392
SDValue Val = peekThroughBitcasts(St->getValue());
2038620393
StoreSource StoreSrc = getStoreSource(Val);
@@ -20396,14 +20403,14 @@ void DAGCombiner::getStoreMergeCandidates(
2039620403
LoadVT = Ld->getMemoryVT();
2039720404
// Load and store should be the same type.
2039820405
if (MemVT != LoadVT)
20399-
return;
20406+
return nullptr;
2040020407
// Loads must only have one use.
2040120408
if (!Ld->hasNUsesOfValue(1, 0))
20402-
return;
20409+
return nullptr;
2040320410
// The memory operands must not be volatile/indexed/atomic.
2040420411
// TODO: May be able to relax for unordered atomics (see D66309)
2040520412
if (!Ld->isSimple() || Ld->isIndexed())
20406-
return;
20413+
return nullptr;
2040720414
}
2040820415
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
2040920416
int64_t &Offset) -> bool {
@@ -20471,6 +20478,27 @@ void DAGCombiner::getStoreMergeCandidates(
2047120478
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
2047220479
};
2047320480

20481+
// We are looking for a root node which is an ancestor to all mergable
20482+
// stores. We search up through a load, to our root and then down
20483+
// through all children. For instance we will find Store{1,2,3} if
20484+
// St is Store1, Store2. or Store3 where the root is not a load
20485+
// which always true for nonvolatile ops. TODO: Expand
20486+
// the search to find all valid candidates through multiple layers of loads.
20487+
//
20488+
// Root
20489+
// |-------|-------|
20490+
// Load Load Store3
20491+
// | |
20492+
// Store1 Store2
20493+
//
20494+
// FIXME: We should be able to climb and
20495+
// descend TokenFactors to find candidates as well.
20496+
20497+
SDNode *RootNode = St->getChain().getNode();
20498+
// Bail out if we already analyzed this root node and found nothing.
20499+
if (ChainsWithoutMergeableStores.contains(RootNode))
20500+
return nullptr;
20501+
2047420502
// Check if the pair of StoreNode and the RootNode already bail out many
2047520503
// times which is over the limit in dependence check.
2047620504
auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
@@ -20494,28 +20522,13 @@ void DAGCombiner::getStoreMergeCandidates(
2049420522
}
2049520523
};
2049620524

20497-
// We looking for a root node which is an ancestor to all mergable
20498-
// stores. We search up through a load, to our root and then down
20499-
// through all children. For instance we will find Store{1,2,3} if
20500-
// St is Store1, Store2. or Store3 where the root is not a load
20501-
// which always true for nonvolatile ops. TODO: Expand
20502-
// the search to find all valid candidates through multiple layers of loads.
20503-
//
20504-
// Root
20505-
// |-------|-------|
20506-
// Load Load Store3
20507-
// | |
20508-
// Store1 Store2
20509-
//
20510-
// FIXME: We should be able to climb and
20511-
// descend TokenFactors to find candidates as well.
20512-
20513-
RootNode = St->getChain().getNode();
20514-
2051520525
unsigned NumNodesExplored = 0;
2051620526
const unsigned MaxSearchNodes = 1024;
2051720527
if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
2051820528
RootNode = Ldn->getChain().getNode();
20529+
// Bail out if we already analyzed this root node and found nothing.
20530+
if (ChainsWithoutMergeableStores.contains(RootNode))
20531+
return nullptr;
2051920532
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
2052020533
I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
2052120534
if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
@@ -20532,6 +20545,8 @@ void DAGCombiner::getStoreMergeCandidates(
2053220545
I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
2053320546
TryToAddCandidate(I);
2053420547
}
20548+
20549+
return RootNode;
2053520550
}
2053620551

2053720552
// We need to check that merging these stores does not cause a loop in the
@@ -21162,9 +21177,8 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
2116221177
return false;
2116321178

2116421179
SmallVector<MemOpLink, 8> StoreNodes;
21165-
SDNode *RootNode;
2116621180
// Find potential store merge candidates by searching through chain sub-DAG
21167-
getStoreMergeCandidates(St, StoreNodes, RootNode);
21181+
SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
2116821182

2116921183
// Check if there is anything to merge.
2117021184
if (StoreNodes.size() < 2)
@@ -21220,6 +21234,11 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
2122021234
llvm_unreachable("Unhandled store source type");
2122121235
}
2122221236
}
21237+
21238+
// Remember if we failed to optimize, to save compile time.
21239+
if (!MadeChange)
21240+
ChainsWithoutMergeableStores.insert(RootNode);
21241+
2122321242
return MadeChange;
2122421243
}
2122521244

0 commit comments

Comments
 (0)