Skip to content

Commit bb5f004

Browse files
authored
Merge pull request #10114 from fhahn/rt-multi-exit-unroll-stable
[AArch64] Runtime-unroll small multi-exit loops on Apple Silicon.
2 parents c6dc6ed + c72fd01 commit bb5f004

File tree

8 files changed

+565
-17
lines changed

8 files changed

+565
-17
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,11 @@ class TargetTransformInfo {
623623
/// Don't allow runtime unrolling if expanding the trip count takes more
624624
/// than SCEVExpansionBudget.
625625
unsigned SCEVExpansionBudget;
626+
/// Allow runtime unrolling multi-exit loops. Should only be set if the
627+
/// target determined that multi-exit unrolling is profitable for the loop.
628+
/// Fall back to the generic logic to determine whether multi-exit unrolling
629+
/// is profitable if set to false.
630+
bool RuntimeUnrollMultiExit;
626631
};
627632

628633
/// Get target-customized preferences for the generic loop unrolling

llvm/include/llvm/Transforms/Utils/UnrollLoop.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ struct UnrollLoopOptions {
7676
bool ForgetAllSCEV;
7777
const Instruction *Heart = nullptr;
7878
unsigned SCEVExpansionBudget;
79+
bool RuntimeUnrollMultiExit = false;
7980
};
8081

8182
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
@@ -91,7 +92,8 @@ bool UnrollRuntimeLoopRemainder(
9192
bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
9293
LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
9394
const TargetTransformInfo *TTI, bool PreserveLCSSA,
94-
unsigned SCEVExpansionBudget, Loop **ResultLoop = nullptr);
95+
unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit,
96+
Loop **ResultLoop = nullptr);
9597

9698
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
9799
unsigned TripMultiple, bool UnrollRemainder,

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3618,15 +3618,14 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
36183618
TargetTransformInfo::UnrollingPreferences &UP,
36193619
AArch64TTIImpl &TTI) {
36203620
// Limit loops with structure that is highly likely to benefit from runtime
3621-
// unrolling; that is we exclude outer loops, loops with multiple exits and
3622-
// many blocks (i.e. likely with complex control flow). Note that the
3623-
// heuristics here may be overly conservative and we err on the side of
3624-
// avoiding runtime unrolling rather than unroll excessively. They are all
3625-
// subject to further refinement.
3626-
if (!L->isInnermost() || !L->getExitBlock() || L->getNumBlocks() > 8)
3621+
// unrolling; that is we exclude outer loops and loops with many blocks (i.e.
3622+
// likely with complex control flow). Note that the heuristics here may be
3623+
// overly conservative and we err on the side of avoiding runtime unrolling
3624+
// rather than unroll excessively. They are all subject to further refinement.
3625+
if (!L->isInnermost() || L->getNumBlocks() > 8)
36273626
return;
36283627

3629-
const SCEV *BTC = SE.getBackedgeTakenCount(L);
3628+
const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
36303629
if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
36313630
(SE.getSmallConstantMaxTripCount(L) > 0 &&
36323631
SE.getSmallConstantMaxTripCount(L) <= 32))
@@ -3645,6 +3644,28 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
36453644
}
36463645
}
36473646

3647+
// Small search loops with multiple exits can be highly beneficial to unroll.
3648+
if (!L->getExitBlock()) {
3649+
if (L->getNumBlocks() == 2 && Size < 6 &&
3650+
all_of(
3651+
L->getBlocks(),
3652+
[](BasicBlock *BB) {
3653+
return isa<BranchInst>(BB->getTerminator());
3654+
})) {
3655+
UP.RuntimeUnrollMultiExit = true;
3656+
UP.Runtime = true;
3657+
// Limit unroll count.
3658+
UP.DefaultUnrollRuntimeCount = 4;
3659+
// Allow slightly more costly trip-count expansion to catch search loops
3660+
// with pointer inductions.
3661+
UP.SCEVExpansionBudget = 5;
3662+
}
3663+
return;
3664+
}
3665+
3666+
if (SE.getSymbolicMaxBackedgeTakenCount(L) != SE.getBackedgeTakenCount(L))
3667+
return;
3668+
36483669
// Limit to loops with trip counts that are cheap to expand.
36493670
UP.SCEVExpansionBudget = 1;
36503671

llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
221221
UP.UnrollAndJamInnerLoopThreshold = 60;
222222
UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
223223
UP.SCEVExpansionBudget = SCEVCheapExpansionBudget;
224+
UP.RuntimeUnrollMultiExit = false;
224225

225226
// Override with any target specific settings
226227
TTI.getUnrollingPreferences(L, SE, UP, &ORE);
@@ -1353,6 +1354,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
13531354
ULO.ForgetAllSCEV = ForgetAllSCEV;
13541355
ULO.Heart = getLoopConvergenceHeart(L);
13551356
ULO.SCEVExpansionBudget = UP.SCEVExpansionBudget;
1357+
ULO.RuntimeUnrollMultiExit = UP.RuntimeUnrollMultiExit;
13561358
LoopUnrollResult UnrollResult = UnrollLoop(
13571359
L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
13581360
if (UnrollResult == LoopUnrollResult::Unmodified)

llvm/lib/Transforms/Utils/LoopUnroll.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -593,10 +593,11 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
593593
: isEpilogProfitable(L);
594594

595595
if (ULO.Runtime &&
596-
!UnrollRuntimeLoopRemainder(
597-
L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability,
598-
ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
599-
PreserveLCSSA, ULO.SCEVExpansionBudget, RemainderLoop)) {
596+
!UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
597+
EpilogProfitability, ULO.UnrollRemainder,
598+
ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
599+
PreserveLCSSA, ULO.SCEVExpansionBudget,
600+
ULO.RuntimeUnrollMultiExit, RemainderLoop)) {
600601
if (ULO.Force)
601602
ULO.Runtime = false;
602603
else {

llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
462462

463463
/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
464464
/// we return true only if UnrollRuntimeMultiExit is set to true.
465-
static bool canProfitablyUnrollMultiExitLoop(
465+
static bool canProfitablyRuntimeUnrollMultiExitLoop(
466466
Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
467467
bool UseEpilogRemainder) {
468468

@@ -584,7 +584,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
584584
bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
585585
LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
586586
const TargetTransformInfo *TTI, bool PreserveLCSSA,
587-
unsigned SCEVExpansionBudget, Loop **ResultLoop) {
587+
unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit,
588+
Loop **ResultLoop) {
588589
LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
589590
LLVM_DEBUG(L->dump());
590591
LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -633,8 +634,9 @@ bool llvm::UnrollRuntimeLoopRemainder(
633634
if (!PreserveLCSSA)
634635
return false;
635636

636-
if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit,
637-
UseEpilogRemainder)) {
637+
if (!RuntimeUnrollMultiExit &&
638+
!canProfitablyRuntimeUnrollMultiExitLoop(L, OtherExits, LatchExit,
639+
UseEpilogRemainder)) {
638640
LLVM_DEBUG(
639641
dbgs()
640642
<< "Multiple exit/exiting blocks in loop and multi-exit unrolling not "

0 commit comments

Comments
 (0)