@@ -3618,15 +3618,14 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
36183618 TargetTransformInfo::UnrollingPreferences &UP,
36193619 AArch64TTIImpl &TTI) {
36203620 // Limit loops with structure that is highly likely to benefit from runtime
3621- // unrolling; that is we exclude outer loops, loops with multiple exits and
3622- // many blocks (i.e. likely with complex control flow). Note that the
3623- // heuristics here may be overly conservative and we err on the side of
3624- // avoiding runtime unrolling rather than unroll excessively. They are all
3625- // subject to further refinement.
3626- if (!L->isInnermost () || !L->getExitBlock () || L->getNumBlocks () > 8 )
3621+ // unrolling; that is we exclude outer loops and loops with many blocks (i.e.
3622+ // likely with complex control flow). Note that the heuristics here may be
3623+ // overly conservative and we err on the side of avoiding runtime unrolling
3624+ // rather than unroll excessively. They are all subject to further refinement.
3625+ if (!L->isInnermost () || L->getNumBlocks () > 8 )
36273626 return ;
36283627
3629- const SCEV *BTC = SE.getBackedgeTakenCount (L);
3628+ const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount (L);
36303629 if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
36313630 (SE.getSmallConstantMaxTripCount (L) > 0 &&
36323631 SE.getSmallConstantMaxTripCount (L) <= 32 ))
@@ -3645,6 +3644,28 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
36453644 }
36463645 }
36473646
3647+ // Small search loops with multiple exits can be highly beneficial to unroll.
3648+ if (!L->getExitBlock ()) {
3649+ if (L->getNumBlocks () == 2 && Size < 6 &&
3650+ all_of (
3651+ L->getBlocks (),
3652+ [](BasicBlock *BB) {
3653+ return isa<BranchInst>(BB->getTerminator ());
3654+ })) {
3655+ UP.RuntimeUnrollMultiExit = true ;
3656+ UP.Runtime = true ;
3657+ // Limit unroll count.
3658+ UP.DefaultUnrollRuntimeCount = 4 ;
3659+ // Allow slightly more costly trip-count expansion to catch search loops
3660+ // with pointer inductions.
3661+ UP.SCEVExpansionBudget = 5 ;
3662+ }
3663+ return ;
3664+ }
3665+
3666+ if (SE.getSymbolicMaxBackedgeTakenCount (L) != SE.getBackedgeTakenCount (L))
3667+ return ;
3668+
36483669 // Limit to loops with trip counts that are cheap to expand.
36493670 UP.SCEVExpansionBudget = 1 ;
36503671
0 commit comments