Skip to content

Commit 273e74b

Browse files
[LCSSA] Cache the loop exit blocks across recursive analysis (NFC) (#101087)
The computation of loop exit blocks recently showed up as a huge compile time cost for a large file. This computation was already being cached during an invocation of formLCSSAForInstructions, but can also be cached across callers formLCSSA and formLCSSARecursively (the latter was what was being invoked in the examined case). Since each of these functions has an external entry point invoked from other passes, doing so required refactoring each into a worker mechanism that takes a LoopExitBlocks map, and the externally callable version that declares the map. That way we can pass it down from the outermost formLCSSARecursively. This reduced the time spent in the LCSSA pass from ~110s to ~1s.
1 parent e59c832 commit 273e74b

File tree

1 file changed

+57
-21
lines changed

1 file changed

+57
-21
lines changed

llvm/lib/Transforms/Utils/LCSSA.cpp

Lines changed: 57 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -71,24 +71,26 @@ static bool isExitBlock(BasicBlock *BB,
7171
return is_contained(ExitBlocks, BB);
7272
}
7373

74+
// Cache the Loop ExitBlocks computed during the analysis. We expect to get a
75+
// lot of instructions within the same loops, computing the exit blocks is
76+
// expensive, and we're not mutating the loop structure.
77+
using LoopExitBlocksTy = SmallDenseMap<Loop *, SmallVector<BasicBlock *, 1>>;
78+
7479
/// For every instruction from the worklist, check to see if it has any uses
7580
/// that are outside the current loop. If so, insert LCSSA PHI nodes and
7681
/// rewrite the uses.
77-
bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
78-
const DominatorTree &DT, const LoopInfo &LI,
79-
ScalarEvolution *SE,
80-
SmallVectorImpl<PHINode *> *PHIsToRemove,
81-
SmallVectorImpl<PHINode *> *InsertedPHIs) {
82+
static bool
83+
formLCSSAForInstructionsImpl(SmallVectorImpl<Instruction *> &Worklist,
84+
const DominatorTree &DT, const LoopInfo &LI,
85+
ScalarEvolution *SE,
86+
SmallVectorImpl<PHINode *> *PHIsToRemove,
87+
SmallVectorImpl<PHINode *> *InsertedPHIs,
88+
LoopExitBlocksTy &LoopExitBlocks) {
8289
SmallVector<Use *, 16> UsesToRewrite;
8390
SmallSetVector<PHINode *, 16> LocalPHIsToRemove;
8491
PredIteratorCache PredCache;
8592
bool Changed = false;
8693

87-
// Cache the Loop ExitBlocks across this loop. We expect to get a lot of
88-
// instructions within the same loops, computing the exit blocks is
89-
// expensive, and we're not mutating the loop structure.
90-
SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks;
91-
9294
while (!Worklist.empty()) {
9395
UsesToRewrite.clear();
9496

@@ -317,13 +319,28 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
317319
return Changed;
318320
}
319321

322+
/// For every instruction from the worklist, check to see if it has any uses
323+
/// that are outside the current loop. If so, insert LCSSA PHI nodes and
324+
/// rewrite the uses.
325+
bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
326+
const DominatorTree &DT, const LoopInfo &LI,
327+
ScalarEvolution *SE,
328+
SmallVectorImpl<PHINode *> *PHIsToRemove,
329+
SmallVectorImpl<PHINode *> *InsertedPHIs) {
330+
LoopExitBlocksTy LoopExitBlocks;
331+
332+
return formLCSSAForInstructionsImpl(Worklist, DT, LI, SE, PHIsToRemove,
333+
InsertedPHIs, LoopExitBlocks);
334+
}
335+
320336
// Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
321337
static void computeBlocksDominatingExits(
322-
Loop &L, const DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
338+
Loop &L, const DominatorTree &DT,
339+
const SmallVectorImpl<BasicBlock *> &ExitBlocks,
323340
SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
324341
// We start from the exit blocks, as every block trivially dominates itself
325342
// (not strictly).
326-
SmallVector<BasicBlock *, 8> BBWorklist(ExitBlocks);
343+
SmallVector<BasicBlock *, 8> BBWorklist(ExitBlocks.begin(), ExitBlocks.end());
327344

328345
while (!BBWorklist.empty()) {
329346
BasicBlock *BB = BBWorklist.pop_back_val();
@@ -360,8 +377,9 @@ static void computeBlocksDominatingExits(
360377
}
361378
}
362379

363-
bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
364-
ScalarEvolution *SE) {
380+
static bool formLCSSAImpl(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
381+
ScalarEvolution *SE,
382+
LoopExitBlocksTy &LoopExitBlocks) {
365383
bool Changed = false;
366384

367385
#ifdef EXPENSIVE_CHECKS
@@ -372,8 +390,9 @@ bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
372390
}
373391
#endif
374392

375-
SmallVector<BasicBlock *, 8> ExitBlocks;
376-
L.getExitBlocks(ExitBlocks);
393+
if (!LoopExitBlocks.count(&L))
394+
L.getExitBlocks(LoopExitBlocks[&L]);
395+
const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[&L];
377396
if (ExitBlocks.empty())
378397
return false;
379398

@@ -414,26 +433,43 @@ bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
414433
}
415434
}
416435

417-
Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE);
436+
Changed = formLCSSAForInstructionsImpl(Worklist, DT, *LI, SE, nullptr,
437+
nullptr, LoopExitBlocks);
418438

419439
assert(L.isLCSSAForm(DT));
420440

421441
return Changed;
422442
}
423443

444+
bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
445+
ScalarEvolution *SE) {
446+
LoopExitBlocksTy LoopExitBlocks;
447+
448+
return formLCSSAImpl(L, DT, LI, SE, LoopExitBlocks);
449+
}
450+
424451
/// Process a loop nest depth first.
425-
bool llvm::formLCSSARecursively(Loop &L, const DominatorTree &DT,
426-
const LoopInfo *LI, ScalarEvolution *SE) {
452+
static bool formLCSSARecursivelyImpl(Loop &L, const DominatorTree &DT,
453+
const LoopInfo *LI, ScalarEvolution *SE,
454+
LoopExitBlocksTy &LoopExitBlocks) {
427455
bool Changed = false;
428456

429457
// Recurse depth-first through inner loops.
430458
for (Loop *SubLoop : L.getSubLoops())
431-
Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE);
459+
Changed |= formLCSSARecursivelyImpl(*SubLoop, DT, LI, SE, LoopExitBlocks);
432460

433-
Changed |= formLCSSA(L, DT, LI, SE);
461+
Changed |= formLCSSAImpl(L, DT, LI, SE, LoopExitBlocks);
434462
return Changed;
435463
}
436464

465+
/// Process a loop nest depth first.
466+
bool llvm::formLCSSARecursively(Loop &L, const DominatorTree &DT,
467+
const LoopInfo *LI, ScalarEvolution *SE) {
468+
LoopExitBlocksTy LoopExitBlocks;
469+
470+
return formLCSSARecursivelyImpl(L, DT, LI, SE, LoopExitBlocks);
471+
}
472+
437473
/// Process all loops in the function, inner-most out.
438474
static bool formLCSSAOnAllLoops(const LoopInfo *LI, const DominatorTree &DT,
439475
ScalarEvolution *SE) {

0 commit comments

Comments
 (0)