@@ -98,6 +98,9 @@ static cl::opt<bool> EnableUnswitchCostMultiplier(
9898static cl::opt<int > UnswitchSiblingsToplevelDiv (
9999 " unswitch-siblings-toplevel-div" , cl::init(2 ), cl::Hidden,
100100 cl::desc(" Toplevel siblings divisor for cost multiplier." ));
101+ static cl::opt<int > UnswitchParentBlocksDiv (
102+ " unswitch-parent-blocks-div" , cl::init(8 ), cl::Hidden,
103+ cl::desc(" Outer loop size divisor for cost multiplier." ));
101104static cl::opt<int > UnswitchNumInitialUnscaledCandidates (
102105 " unswitch-num-initial-unscaled-candidates" , cl::init(8 ), cl::Hidden,
103106 cl::desc(" Number of unswitch candidates that are ignored when calculating "
@@ -2809,9 +2812,9 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
28092812}
28102813
28112814// / Cost multiplier is a way to limit potentially exponential behavior
2812- // / of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2813- // / candidates available. Also accounting for the number of "sibling" loops with
2814- // / the idea to account for previous unswitches that already happened on this
2815+ // / of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
2816+ // / candidates available. Also consider the number of "sibling" loops with
2817+ // / the idea of accounting for previous unswitches that already happened on this
28152818// / cluster of loops. There was an attempt to keep this formula simple,
28162819// / just enough to limit the worst case behavior. Even if it is not that simple
28172820// / now it is still not an attempt to provide a detailed heuristic size
@@ -2842,7 +2845,19 @@ static int CalculateUnswitchCostMultiplier(
28422845 return 1 ;
28432846 }
28442847
2848+ // Each invariant non-trivial condition, after being unswitched, is supposed
2849+ // to have its own specialized sibling loop (the invariant condition has been
2850+ // hoisted out of the child loop into a newly-cloned loop). When unswitching
2851+ // conditions in nested loops, the basic block size of the outer loop should
2852+ // not be altered. If such a size significantly increases across unswitching
2853+ // invocations, something may be wrong; so adjust the final cost taking this
2854+ // into account.
28452855 auto *ParentL = L.getParentLoop ();
2856+ int ParentLoopSizeMultiplier = 1 ;
2857+ if (ParentL)
2858+ ParentLoopSizeMultiplier =
2859+ std::max<int >(ParentL->getNumBlocks () / UnswitchParentBlocksDiv, 1 );
2860+
28462861 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector ().size ()
28472862 : std::distance (LI.begin (), LI.end ()));
28482863 // Count amount of clones that all the candidates might cause during
@@ -2887,14 +2902,16 @@ static int CalculateUnswitchCostMultiplier(
28872902 // at an upper bound.
28882903 int CostMultiplier;
28892904 if (ClonesPower > Log2_32 (UnswitchThreshold) ||
2890- SiblingsMultiplier > UnswitchThreshold)
2905+ SiblingsMultiplier > UnswitchThreshold ||
2906+ ParentLoopSizeMultiplier > UnswitchThreshold)
28912907 CostMultiplier = UnswitchThreshold;
28922908 else
28932909 CostMultiplier = std::min (SiblingsMultiplier * (1 << ClonesPower),
28942910 (int )UnswitchThreshold);
28952911
28962912 LLVM_DEBUG (dbgs () << " Computed multiplier " << CostMultiplier
2897- << " (siblings " << SiblingsMultiplier << " * clones "
2913+ << " (siblings " << SiblingsMultiplier << " * parent size "
2914+ << ParentLoopSizeMultiplier << " * clones "
28982915 << (1 << ClonesPower) << " )"
28992916 << " for unswitch candidate: " << TI << " \n " );
29002917 return CostMultiplier;
0 commit comments