@@ -191,6 +191,11 @@ namespace {
191
191
// AA - Used for DAG load/store alias analysis.
192
192
AliasAnalysis *AA;
193
193
194
+ /// This caches all chains that have already been processed in
195
+ /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
196
+ /// stores candidates.
197
+ SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
198
+
194
199
/// When an instruction is simplified, add all users of the instruction to
195
200
/// the work lists because they might get more simplified now.
196
201
void AddUsersToWorklist(SDNode *N) {
@@ -776,11 +781,10 @@ namespace {
776
781
bool UseTrunc);
777
782
778
783
/// This is a helper function for mergeConsecutiveStores. Stores that
779
- /// potentially may be merged with St are placed in StoreNodes. RootNode is
780
- /// a chain predecessor to all store candidates.
781
- void getStoreMergeCandidates(StoreSDNode *St,
782
- SmallVectorImpl<MemOpLink> &StoreNodes,
783
- SDNode *&Root);
784
+ /// potentially may be merged with St are placed in StoreNodes. On success,
785
+ /// returns a chain predecessor to all store candidates.
786
+ SDNode *getStoreMergeCandidates(StoreSDNode *St,
787
+ SmallVectorImpl<MemOpLink> &StoreNodes);
784
788
785
789
/// Helper function for mergeConsecutiveStores. Checks if candidate stores
786
790
/// have indirect dependency through their operands. RootNode is the
@@ -1782,6 +1786,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
1782
1786
1783
1787
++NodesCombined;
1784
1788
1789
+ // Invalidate cached info.
1790
+ ChainsWithoutMergeableStores.clear();
1791
+
1785
1792
// If we get back the same node we passed in, rather than a new node or
1786
1793
// zero, we know that the node must have defined multiple values and
1787
1794
// CombineTo was used. Since CombineTo takes care of the worklist
@@ -20372,15 +20379,15 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20372
20379
return true;
20373
20380
}
20374
20381
20375
- void DAGCombiner::getStoreMergeCandidates(
20376
- StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes ,
20377
- SDNode *&RootNode ) {
20382
+ SDNode *
20383
+ DAGCombiner::getStoreMergeCandidates( StoreSDNode *St,
20384
+ SmallVectorImpl<MemOpLink> &StoreNodes ) {
20378
20385
// This holds the base pointer, index, and the offset in bytes from the base
20379
20386
// pointer. We must have a base and an offset. Do not handle stores to undef
20380
20387
// base pointers.
20381
20388
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20382
20389
if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20383
- return;
20390
+ return nullptr ;
20384
20391
20385
20392
SDValue Val = peekThroughBitcasts(St->getValue());
20386
20393
StoreSource StoreSrc = getStoreSource(Val);
@@ -20396,14 +20403,14 @@ void DAGCombiner::getStoreMergeCandidates(
20396
20403
LoadVT = Ld->getMemoryVT();
20397
20404
// Load and store should be the same type.
20398
20405
if (MemVT != LoadVT)
20399
- return;
20406
+ return nullptr ;
20400
20407
// Loads must only have one use.
20401
20408
if (!Ld->hasNUsesOfValue(1, 0))
20402
- return;
20409
+ return nullptr ;
20403
20410
// The memory operands must not be volatile/indexed/atomic.
20404
20411
// TODO: May be able to relax for unordered atomics (see D66309)
20405
20412
if (!Ld->isSimple() || Ld->isIndexed())
20406
- return;
20413
+ return nullptr ;
20407
20414
}
20408
20415
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20409
20416
int64_t &Offset) -> bool {
@@ -20471,6 +20478,27 @@ void DAGCombiner::getStoreMergeCandidates(
20471
20478
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20472
20479
};
20473
20480
20481
+ // We are looking for a root node which is an ancestor to all mergable
20482
+ // stores. We search up through a load, to our root and then down
20483
+ // through all children. For instance we will find Store{1,2,3} if
20484
+ // St is Store1, Store2. or Store3 where the root is not a load
20485
+ // which always true for nonvolatile ops. TODO: Expand
20486
+ // the search to find all valid candidates through multiple layers of loads.
20487
+ //
20488
+ // Root
20489
+ // |-------|-------|
20490
+ // Load Load Store3
20491
+ // | |
20492
+ // Store1 Store2
20493
+ //
20494
+ // FIXME: We should be able to climb and
20495
+ // descend TokenFactors to find candidates as well.
20496
+
20497
+ SDNode *RootNode = St->getChain().getNode();
20498
+ // Bail out if we already analyzed this root node and found nothing.
20499
+ if (ChainsWithoutMergeableStores.contains(RootNode))
20500
+ return nullptr;
20501
+
20474
20502
// Check if the pair of StoreNode and the RootNode already bail out many
20475
20503
// times which is over the limit in dependence check.
20476
20504
auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
@@ -20494,28 +20522,13 @@ void DAGCombiner::getStoreMergeCandidates(
20494
20522
}
20495
20523
};
20496
20524
20497
- // We looking for a root node which is an ancestor to all mergable
20498
- // stores. We search up through a load, to our root and then down
20499
- // through all children. For instance we will find Store{1,2,3} if
20500
- // St is Store1, Store2. or Store3 where the root is not a load
20501
- // which always true for nonvolatile ops. TODO: Expand
20502
- // the search to find all valid candidates through multiple layers of loads.
20503
- //
20504
- // Root
20505
- // |-------|-------|
20506
- // Load Load Store3
20507
- // | |
20508
- // Store1 Store2
20509
- //
20510
- // FIXME: We should be able to climb and
20511
- // descend TokenFactors to find candidates as well.
20512
-
20513
- RootNode = St->getChain().getNode();
20514
-
20515
20525
unsigned NumNodesExplored = 0;
20516
20526
const unsigned MaxSearchNodes = 1024;
20517
20527
if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20518
20528
RootNode = Ldn->getChain().getNode();
20529
+ // Bail out if we already analyzed this root node and found nothing.
20530
+ if (ChainsWithoutMergeableStores.contains(RootNode))
20531
+ return nullptr;
20519
20532
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20520
20533
I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20521
20534
if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
@@ -20532,6 +20545,8 @@ void DAGCombiner::getStoreMergeCandidates(
20532
20545
I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20533
20546
TryToAddCandidate(I);
20534
20547
}
20548
+
20549
+ return RootNode;
20535
20550
}
20536
20551
20537
20552
// We need to check that merging these stores does not cause a loop in the
@@ -21162,9 +21177,8 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21162
21177
return false;
21163
21178
21164
21179
SmallVector<MemOpLink, 8> StoreNodes;
21165
- SDNode *RootNode;
21166
21180
// Find potential store merge candidates by searching through chain sub-DAG
21167
- getStoreMergeCandidates(St, StoreNodes, RootNode );
21181
+ SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
21168
21182
21169
21183
// Check if there is anything to merge.
21170
21184
if (StoreNodes.size() < 2)
@@ -21220,6 +21234,11 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21220
21234
llvm_unreachable("Unhandled store source type");
21221
21235
}
21222
21236
}
21237
+
21238
+ // Remember if we failed to optimize, to save compile time.
21239
+ if (!MadeChange)
21240
+ ChainsWithoutMergeableStores.insert(RootNode);
21241
+
21223
21242
return MadeChange;
21224
21243
}
21225
21244
0 commit comments