@@ -191,6 +191,11 @@ namespace {
191191 // AA - Used for DAG load/store alias analysis.
192192 AliasAnalysis *AA;
193193
194+ /// This caches all chains that have already been processed in
195+ /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
196+ /// stores candidates.
197+ SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
198+
194199 /// When an instruction is simplified, add all users of the instruction to
195200 /// the work lists because they might get more simplified now.
196201 void AddUsersToWorklist(SDNode *N) {
@@ -776,11 +781,10 @@ namespace {
776781 bool UseTrunc);
777782
778783 /// This is a helper function for mergeConsecutiveStores. Stores that
779- /// potentially may be merged with St are placed in StoreNodes. RootNode is
780- /// a chain predecessor to all store candidates.
781- void getStoreMergeCandidates(StoreSDNode *St,
782- SmallVectorImpl<MemOpLink> &StoreNodes,
783- SDNode *&Root);
784+ /// potentially may be merged with St are placed in StoreNodes. On success,
785+ /// returns a chain predecessor to all store candidates.
786+ SDNode *getStoreMergeCandidates(StoreSDNode *St,
787+ SmallVectorImpl<MemOpLink> &StoreNodes);
784788
785789 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
786790 /// have indirect dependency through their operands. RootNode is the
@@ -1782,6 +1786,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
17821786
17831787 ++NodesCombined;
17841788
1789+ // Invalidate cached info.
1790+ ChainsWithoutMergeableStores.clear();
1791+
17851792 // If we get back the same node we passed in, rather than a new node or
17861793 // zero, we know that the node must have defined multiple values and
17871794 // CombineTo was used. Since CombineTo takes care of the worklist
@@ -20372,15 +20379,15 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
2037220379 return true;
2037320380}
2037420381
20375- void DAGCombiner::getStoreMergeCandidates(
20376- StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes ,
20377- SDNode *&RootNode ) {
20382+ SDNode *
20383+ DAGCombiner::getStoreMergeCandidates( StoreSDNode *St,
20384+ SmallVectorImpl<MemOpLink> &StoreNodes ) {
2037820385 // This holds the base pointer, index, and the offset in bytes from the base
2037920386 // pointer. We must have a base and an offset. Do not handle stores to undef
2038020387 // base pointers.
2038120388 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
2038220389 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20383- return;
20390+ return nullptr ;
2038420391
2038520392 SDValue Val = peekThroughBitcasts(St->getValue());
2038620393 StoreSource StoreSrc = getStoreSource(Val);
@@ -20396,14 +20403,14 @@ void DAGCombiner::getStoreMergeCandidates(
2039620403 LoadVT = Ld->getMemoryVT();
2039720404 // Load and store should be the same type.
2039820405 if (MemVT != LoadVT)
20399- return;
20406+ return nullptr ;
2040020407 // Loads must only have one use.
2040120408 if (!Ld->hasNUsesOfValue(1, 0))
20402- return;
20409+ return nullptr ;
2040320410 // The memory operands must not be volatile/indexed/atomic.
2040420411 // TODO: May be able to relax for unordered atomics (see D66309)
2040520412 if (!Ld->isSimple() || Ld->isIndexed())
20406- return;
20413+ return nullptr ;
2040720414 }
2040820415 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
2040920416 int64_t &Offset) -> bool {
@@ -20471,6 +20478,27 @@ void DAGCombiner::getStoreMergeCandidates(
2047120478 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
2047220479 };
2047320480
20481+ // We are looking for a root node which is an ancestor to all mergable
20482+ // stores. We search up through a load, to our root and then down
20483+ // through all children. For instance we will find Store{1,2,3} if
20484+ // St is Store1, Store2. or Store3 where the root is not a load
20485+ // which always true for nonvolatile ops. TODO: Expand
20486+ // the search to find all valid candidates through multiple layers of loads.
20487+ //
20488+ // Root
20489+ // |-------|-------|
20490+ // Load Load Store3
20491+ // | |
20492+ // Store1 Store2
20493+ //
20494+ // FIXME: We should be able to climb and
20495+ // descend TokenFactors to find candidates as well.
20496+
20497+ SDNode *RootNode = St->getChain().getNode();
20498+ // Bail out if we already analyzed this root node and found nothing.
20499+ if (ChainsWithoutMergeableStores.contains(RootNode))
20500+ return nullptr;
20501+
2047420502 // Check if the pair of StoreNode and the RootNode already bail out many
2047520503 // times which is over the limit in dependence check.
2047620504 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
@@ -20494,28 +20522,13 @@ void DAGCombiner::getStoreMergeCandidates(
2049420522 }
2049520523 };
2049620524
20497- // We looking for a root node which is an ancestor to all mergable
20498- // stores. We search up through a load, to our root and then down
20499- // through all children. For instance we will find Store{1,2,3} if
20500- // St is Store1, Store2. or Store3 where the root is not a load
20501- // which always true for nonvolatile ops. TODO: Expand
20502- // the search to find all valid candidates through multiple layers of loads.
20503- //
20504- // Root
20505- // |-------|-------|
20506- // Load Load Store3
20507- // | |
20508- // Store1 Store2
20509- //
20510- // FIXME: We should be able to climb and
20511- // descend TokenFactors to find candidates as well.
20512-
20513- RootNode = St->getChain().getNode();
20514-
2051520525 unsigned NumNodesExplored = 0;
2051620526 const unsigned MaxSearchNodes = 1024;
2051720527 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
2051820528 RootNode = Ldn->getChain().getNode();
20529+ // Bail out if we already analyzed this root node and found nothing.
20530+ if (ChainsWithoutMergeableStores.contains(RootNode))
20531+ return nullptr;
2051920532 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
2052020533 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
2052120534 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
@@ -20532,6 +20545,8 @@ void DAGCombiner::getStoreMergeCandidates(
2053220545 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
2053320546 TryToAddCandidate(I);
2053420547 }
20548+
20549+ return RootNode;
2053520550}
2053620551
2053720552// We need to check that merging these stores does not cause a loop in the
@@ -21162,9 +21177,8 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
2116221177 return false;
2116321178
2116421179 SmallVector<MemOpLink, 8> StoreNodes;
21165- SDNode *RootNode;
2116621180 // Find potential store merge candidates by searching through chain sub-DAG
21167- getStoreMergeCandidates(St, StoreNodes, RootNode );
21181+ SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
2116821182
2116921183 // Check if there is anything to merge.
2117021184 if (StoreNodes.size() < 2)
@@ -21220,6 +21234,11 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
2122021234 llvm_unreachable("Unhandled store source type");
2122121235 }
2122221236 }
21237+
21238+ // Remember if we failed to optimize, to save compile time.
21239+ if (!MadeChange)
21240+ ChainsWithoutMergeableStores.insert(RootNode);
21241+
2122321242 return MadeChange;
2122421243}
2122521244
0 commit comments