@@ -3713,7 +3713,9 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
37133713 // We really should consider hoisting from conditionally executed blocks, if they are frequently executed
37143714 // and it is safe to evaluate the tree early.
37153715 //
3716- ArrayStack<BasicBlock*> defExec (getAllocatorLoopHoist ());
3716+ assert (m_dfsTree != nullptr );
3717+ BitVecTraits traits (m_dfsTree->PostOrderTraits ());
3718+ BitVec defExec (BitVecOps::MakeEmpty (&traits));
37173719
37183720 // Add the pre-headers of any child loops to the list of blocks to consider for hoisting.
37193721 // Note that these are not necessarily definitely executed. However, it is a heuristic that they will
@@ -3771,7 +3773,7 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
37713773 }
37723774 }
37733775 JITDUMP (" -- " FMT_BB " (child loop pre-header)\n " , childPreHead->bbNum );
3774- defExec. Push ( childPreHead);
3776+ BitVecOps::AddElemD (&traits, defExec, childPreHead-> bbPostorderNum );
37753777 }
37763778
37773779 if (loop->ExitEdges ().size () == 1 )
@@ -3796,7 +3798,7 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
37963798 while ((cur != nullptr ) && (cur != loop->GetHeader ()) && loop->ContainsBlock (cur))
37973799 {
37983800 JITDUMP (" -- " FMT_BB " (dominate exit block)\n " , cur->bbNum );
3799- defExec. Push ( cur);
3801+ BitVecOps::AddElemD (&traits, defExec, cur-> bbPostorderNum );
38003802 cur = cur->bbIDom ;
38013803 }
38023804
@@ -3812,9 +3814,9 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
38123814 }
38133815
38143816 JITDUMP (" -- " FMT_BB " (header block)\n " , loop->GetHeader ()->bbNum );
3815- defExec. Push ( loop->GetHeader ());
3817+ BitVecOps::AddElemD (&traits, defExec, loop->GetHeader ()-> bbPostorderNum );
38163818
3817- optHoistLoopBlocks (loop, &defExec, hoistCtxt);
3819+ optHoistLoopBlocks (loop, &traits, defExec, hoistCtxt);
38183820
38193821 unsigned numHoisted = hoistCtxt->m_hoistedFPExprCount + hoistCtxt->m_hoistedExprCount ;
38203822#ifdef FEATURE_MASKED_HW_INTRINSICS
@@ -3823,7 +3825,10 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
38233825 return numHoisted > 0 ;
38243826}
38253827
3826- bool Compiler::optIsProfitableToHoistTree (GenTree* tree, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt)
3828+ bool Compiler::optIsProfitableToHoistTree (GenTree* tree,
3829+ FlowGraphNaturalLoop* loop,
3830+ LoopHoistContext* hoistCtxt,
3831+ bool defExecuted)
38273832{
38283833 bool loopContainsCall = m_loopSideEffects[loop->GetIndex ()].ContainsCall ;
38293834
@@ -3894,6 +3899,14 @@ bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* l
38943899 // always be a subset of the InOut variables for the loop
38953900 assert (loopVarCount <= varInOutCount);
38963901
3902+ // If this tree isn't guaranteed to execute every loop iteration,
3903+ // consider hoisting it only if it is expensive.
3904+ //
3905+ if (!defExecuted && (tree->GetCostEx () < (IND_COST_EX * 16 )))
3906+ {
3907+ return false ;
3908+ }
3909+
38973910 // When loopVarCount >= availRegCount we believe that all of the
38983911 // available registers will get used to hold LclVars inside the loop.
38993912 // This pessimistically assumes that each loopVar has a conflicting
@@ -4112,17 +4125,14 @@ void LoopSideEffects::AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE s
41124125//
41134126// Arguments:
41144127// loop - The loop
4115- // blocks - A stack of blocks belonging to the loop
4128+ // traits - Bit vector traits for 'defExecuted'
4129+ // defExecuted - Bit vector of definitely executed blocks in the loop
41164130// hoistContext - The loop hoist context
41174131//
4118- // Assumptions:
4119- // The `blocks` stack contains the definitely-executed blocks in
4120- // the loop, in the execution order, starting with the loop entry
4121- // block on top of the stack.
4122- //
4123- void Compiler::optHoistLoopBlocks (FlowGraphNaturalLoop* loop,
4124- ArrayStack<BasicBlock*>* blocks,
4125- LoopHoistContext* hoistContext)
4132+ void Compiler::optHoistLoopBlocks (FlowGraphNaturalLoop* loop,
4133+ BitVecTraits* traits,
4134+ BitVec defExecuted,
4135+ LoopHoistContext* hoistContext)
41264136{
41274137 class HoistVisitor : public GenTreeVisitor <HoistVisitor>
41284138 {
@@ -4161,6 +4171,8 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
41614171 FlowGraphNaturalLoop* m_loop;
41624172 LoopHoistContext* m_hoistContext;
41634173 BasicBlock* m_currentBlock;
4174+ BitVecTraits* m_traits;
4175+ BitVec m_defExec;
41644176
41654177 bool IsNodeHoistable (GenTree* node)
41664178 {
@@ -4283,13 +4295,19 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
42834295 UseExecutionOrder = true ,
42844296 };
42854297
4286- HoistVisitor (Compiler* compiler, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistContext)
4298+ HoistVisitor (Compiler* compiler,
4299+ FlowGraphNaturalLoop* loop,
4300+ LoopHoistContext* hoistContext,
4301+ BitVecTraits* traits,
4302+ BitVec defExec)
42874303 : GenTreeVisitor(compiler)
42884304 , m_valueStack(compiler->getAllocator (CMK_LoopHoist))
42894305 , m_beforeSideEffect(true )
42904306 , m_loop(loop)
42914307 , m_hoistContext(hoistContext)
42924308 , m_currentBlock(nullptr )
4309+ , m_traits(traits)
4310+ , m_defExec(defExec)
42934311 {
42944312 }
42954313
@@ -4305,7 +4323,8 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
43054323 // hoist the top node?
43064324 if (top.m_hoistable )
43074325 {
4308- m_compiler->optHoistCandidate (stmt->GetRootNode (), block, m_loop, m_hoistContext);
4326+ const bool defExecuted = BitVecOps::IsMember (m_traits, m_defExec, block->bbPostorderNum );
4327+ m_compiler->optHoistCandidate (stmt->GetRootNode (), block, m_loop, m_hoistContext, defExecuted);
43094328 }
43104329 else
43114330 {
@@ -4655,7 +4674,10 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
46554674
46564675 if (IsHoistableOverExcepSibling (value.Node (), hasExcep))
46574676 {
4658- m_compiler->optHoistCandidate (value.Node (), m_currentBlock, m_loop, m_hoistContext);
4677+ const bool defExecuted =
4678+ BitVecOps::IsMember (m_traits, m_defExec, m_currentBlock->bbPostorderNum );
4679+ m_compiler->optHoistCandidate (value.Node (), m_currentBlock, m_loop, m_hoistContext,
4680+ defExecuted);
46594681 }
46604682
46614683 // Don't hoist this tree again.
@@ -4701,12 +4723,9 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
47014723 }
47024724 };
47034725
4704- HoistVisitor visitor (this , loop, hoistContext);
4705-
4706- while (!blocks->Empty ())
4707- {
4708- BasicBlock* block = blocks->Pop ();
4709- weight_t blockWeight = block->getBBWeight (this );
4726+ HoistVisitor visitor (this , loop, hoistContext, traits, defExecuted);
4727+ loop->VisitLoopBlocks ([&](BasicBlock* block) -> BasicBlockVisit {
4728+ const weight_t blockWeight = block->getBBWeight (this );
47104729
47114730 JITDUMP (" \n optHoistLoopBlocks " FMT_BB " (weight=%6s) of loop " FMT_LP " (head: " FMT_BB " )\n " ,
47124731 block->bbNum , refCntWtd2str (blockWeight, /* padForDecimalPlaces */ true ), loop->GetIndex (),
@@ -4715,22 +4734,23 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
47154734 if (blockWeight < (BB_UNITY_WEIGHT / 10 ))
47164735 {
47174736 JITDUMP (" block weight is too small to perform hoisting.\n " );
4718- continue ;
4737+ }
4738+ else
4739+ {
4740+ visitor.HoistBlock (block);
47194741 }
47204742
4721- visitor. HoistBlock (block) ;
4722- }
4743+ return BasicBlockVisit::Continue ;
4744+ });
47234745
47244746 hoistContext->ResetHoistedInCurLoop ();
47254747}
47264748
4727- void Compiler::optHoistCandidate (GenTree* tree,
4728- BasicBlock* treeBb,
4729- FlowGraphNaturalLoop* loop,
4730- LoopHoistContext* hoistCtxt)
4749+ void Compiler::optHoistCandidate (
4750+ GenTree* tree, BasicBlock* treeBb, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt, bool defExecuted)
47314751{
47324752 // It must pass the hoistable profitability tests for this loop level
4733- if (!optIsProfitableToHoistTree (tree, loop, hoistCtxt))
4753+ if (!optIsProfitableToHoistTree (tree, loop, hoistCtxt, defExecuted ))
47344754 {
47354755 JITDUMP (" ... not profitable to hoist\n " );
47364756 return ;
0 commit comments