Skip to content

Commit 2fe0457

Browse files
JIT: Consider conditionally executed loop statements for hoisting (#117829)
Fixes the roughly 10x regression in #116486. This quirks loop hoisting to iterate all loop blocks, and consider hoisting expensive statements even if they don't execute every iteration. The `(IND_COST_EX * 16)` cutoff was chosen to minimize churn, while still solving the above regression.
1 parent 7c5ea10 commit 2fe0457

File tree

2 files changed

+55
-35
lines changed

2 files changed

+55
-35
lines changed

src/coreclr/jit/compiler.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6954,13 +6954,13 @@ class Compiler
69546954

69556955
// Hoist all expressions in "blocks" that are invariant in "loop"
69566956
// outside of that loop.
6957-
void optHoistLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStack<BasicBlock*>* blocks, LoopHoistContext* hoistContext);
6957+
void optHoistLoopBlocks(FlowGraphNaturalLoop* loop, BitVecTraits* traits, BitVec blocks, LoopHoistContext* hoistContext);
69586958

69596959
// Return true if the tree looks profitable to hoist out of "loop"
6960-
bool optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt);
6960+
bool optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt, bool defExecuted);
69616961

69626962
// Performs the hoisting "tree" into the PreHeader for "loop"
6963-
void optHoistCandidate(GenTree* tree, BasicBlock* treeBb, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt);
6963+
void optHoistCandidate(GenTree* tree, BasicBlock* treeBb, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt, bool defExecuted);
69646964

69656965
// Note the new SSA uses in tree
69666966
void optRecordSsaUses(GenTree* tree, BasicBlock* block);

src/coreclr/jit/optimizer.cpp

Lines changed: 52 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3713,7 +3713,9 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
37133713
// We really should consider hoisting from conditionally executed blocks, if they are frequently executed
37143714
// and it is safe to evaluate the tree early.
37153715
//
3716-
ArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
3716+
assert(m_dfsTree != nullptr);
3717+
BitVecTraits traits(m_dfsTree->PostOrderTraits());
3718+
BitVec defExec(BitVecOps::MakeEmpty(&traits));
37173719

37183720
// Add the pre-headers of any child loops to the list of blocks to consider for hoisting.
37193721
// Note that these are not necessarily definitely executed. However, it is a heuristic that they will
@@ -3771,7 +3773,7 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
37713773
}
37723774
}
37733775
JITDUMP(" -- " FMT_BB " (child loop pre-header)\n", childPreHead->bbNum);
3774-
defExec.Push(childPreHead);
3776+
BitVecOps::AddElemD(&traits, defExec, childPreHead->bbPostorderNum);
37753777
}
37763778

37773779
if (loop->ExitEdges().size() == 1)
@@ -3796,7 +3798,7 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
37963798
while ((cur != nullptr) && (cur != loop->GetHeader()) && loop->ContainsBlock(cur))
37973799
{
37983800
JITDUMP(" -- " FMT_BB " (dominate exit block)\n", cur->bbNum);
3799-
defExec.Push(cur);
3801+
BitVecOps::AddElemD(&traits, defExec, cur->bbPostorderNum);
38003802
cur = cur->bbIDom;
38013803
}
38023804

@@ -3812,9 +3814,9 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
38123814
}
38133815

38143816
JITDUMP(" -- " FMT_BB " (header block)\n", loop->GetHeader()->bbNum);
3815-
defExec.Push(loop->GetHeader());
3817+
BitVecOps::AddElemD(&traits, defExec, loop->GetHeader()->bbPostorderNum);
38163818

3817-
optHoistLoopBlocks(loop, &defExec, hoistCtxt);
3819+
optHoistLoopBlocks(loop, &traits, defExec, hoistCtxt);
38183820

38193821
unsigned numHoisted = hoistCtxt->m_hoistedFPExprCount + hoistCtxt->m_hoistedExprCount;
38203822
#ifdef FEATURE_MASKED_HW_INTRINSICS
@@ -3823,7 +3825,10 @@ bool Compiler::optHoistThisLoop(FlowGraphNaturalLoop* loop, LoopHoistContext* ho
38233825
return numHoisted > 0;
38243826
}
38253827

3826-
bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt)
3828+
bool Compiler::optIsProfitableToHoistTree(GenTree* tree,
3829+
FlowGraphNaturalLoop* loop,
3830+
LoopHoistContext* hoistCtxt,
3831+
bool defExecuted)
38273832
{
38283833
bool loopContainsCall = m_loopSideEffects[loop->GetIndex()].ContainsCall;
38293834

@@ -3894,6 +3899,14 @@ bool Compiler::optIsProfitableToHoistTree(GenTree* tree, FlowGraphNaturalLoop* l
38943899
// always be a subset of the InOut variables for the loop
38953900
assert(loopVarCount <= varInOutCount);
38963901

3902+
// If this tree isn't guaranteed to execute every loop iteration,
3903+
// consider hoisting it only if it is expensive.
3904+
//
3905+
if (!defExecuted && (tree->GetCostEx() < (IND_COST_EX * 16)))
3906+
{
3907+
return false;
3908+
}
3909+
38973910
// When loopVarCount >= availRegCount we believe that all of the
38983911
// available registers will get used to hold LclVars inside the loop.
38993912
// This pessimistically assumes that each loopVar has a conflicting
@@ -4112,17 +4125,14 @@ void LoopSideEffects::AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE s
41124125
//
41134126
// Arguments:
41144127
// loop - The loop
4115-
// blocks - A stack of blocks belonging to the loop
4128+
// traits - Bit vector traits for 'defExecuted'
4129+
// defExecuted - Bit vector of definitely executed blocks in the loop
41164130
// hoistContext - The loop hoist context
41174131
//
4118-
// Assumptions:
4119-
// The `blocks` stack contains the definitely-executed blocks in
4120-
// the loop, in the execution order, starting with the loop entry
4121-
// block on top of the stack.
4122-
//
4123-
void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
4124-
ArrayStack<BasicBlock*>* blocks,
4125-
LoopHoistContext* hoistContext)
4132+
void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
4133+
BitVecTraits* traits,
4134+
BitVec defExecuted,
4135+
LoopHoistContext* hoistContext)
41264136
{
41274137
class HoistVisitor : public GenTreeVisitor<HoistVisitor>
41284138
{
@@ -4161,6 +4171,8 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
41614171
FlowGraphNaturalLoop* m_loop;
41624172
LoopHoistContext* m_hoistContext;
41634173
BasicBlock* m_currentBlock;
4174+
BitVecTraits* m_traits;
4175+
BitVec m_defExec;
41644176

41654177
bool IsNodeHoistable(GenTree* node)
41664178
{
@@ -4283,13 +4295,19 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
42834295
UseExecutionOrder = true,
42844296
};
42854297

4286-
HoistVisitor(Compiler* compiler, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistContext)
4298+
HoistVisitor(Compiler* compiler,
4299+
FlowGraphNaturalLoop* loop,
4300+
LoopHoistContext* hoistContext,
4301+
BitVecTraits* traits,
4302+
BitVec defExec)
42874303
: GenTreeVisitor(compiler)
42884304
, m_valueStack(compiler->getAllocator(CMK_LoopHoist))
42894305
, m_beforeSideEffect(true)
42904306
, m_loop(loop)
42914307
, m_hoistContext(hoistContext)
42924308
, m_currentBlock(nullptr)
4309+
, m_traits(traits)
4310+
, m_defExec(defExec)
42934311
{
42944312
}
42954313

@@ -4305,7 +4323,8 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
43054323
// hoist the top node?
43064324
if (top.m_hoistable)
43074325
{
4308-
m_compiler->optHoistCandidate(stmt->GetRootNode(), block, m_loop, m_hoistContext);
4326+
const bool defExecuted = BitVecOps::IsMember(m_traits, m_defExec, block->bbPostorderNum);
4327+
m_compiler->optHoistCandidate(stmt->GetRootNode(), block, m_loop, m_hoistContext, defExecuted);
43094328
}
43104329
else
43114330
{
@@ -4655,7 +4674,10 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
46554674

46564675
if (IsHoistableOverExcepSibling(value.Node(), hasExcep))
46574676
{
4658-
m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loop, m_hoistContext);
4677+
const bool defExecuted =
4678+
BitVecOps::IsMember(m_traits, m_defExec, m_currentBlock->bbPostorderNum);
4679+
m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loop, m_hoistContext,
4680+
defExecuted);
46594681
}
46604682

46614683
// Don't hoist this tree again.
@@ -4701,12 +4723,9 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
47014723
}
47024724
};
47034725

4704-
HoistVisitor visitor(this, loop, hoistContext);
4705-
4706-
while (!blocks->Empty())
4707-
{
4708-
BasicBlock* block = blocks->Pop();
4709-
weight_t blockWeight = block->getBBWeight(this);
4726+
HoistVisitor visitor(this, loop, hoistContext, traits, defExecuted);
4727+
loop->VisitLoopBlocks([&](BasicBlock* block) -> BasicBlockVisit {
4728+
const weight_t blockWeight = block->getBBWeight(this);
47104729

47114730
JITDUMP("\n optHoistLoopBlocks " FMT_BB " (weight=%6s) of loop " FMT_LP " (head: " FMT_BB ")\n",
47124731
block->bbNum, refCntWtd2str(blockWeight, /* padForDecimalPlaces */ true), loop->GetIndex(),
@@ -4715,22 +4734,23 @@ void Compiler::optHoistLoopBlocks(FlowGraphNaturalLoop* loop,
47154734
if (blockWeight < (BB_UNITY_WEIGHT / 10))
47164735
{
47174736
JITDUMP(" block weight is too small to perform hoisting.\n");
4718-
continue;
4737+
}
4738+
else
4739+
{
4740+
visitor.HoistBlock(block);
47194741
}
47204742

4721-
visitor.HoistBlock(block);
4722-
}
4743+
return BasicBlockVisit::Continue;
4744+
});
47234745

47244746
hoistContext->ResetHoistedInCurLoop();
47254747
}
47264748

4727-
void Compiler::optHoistCandidate(GenTree* tree,
4728-
BasicBlock* treeBb,
4729-
FlowGraphNaturalLoop* loop,
4730-
LoopHoistContext* hoistCtxt)
4749+
void Compiler::optHoistCandidate(
4750+
GenTree* tree, BasicBlock* treeBb, FlowGraphNaturalLoop* loop, LoopHoistContext* hoistCtxt, bool defExecuted)
47314751
{
47324752
// It must pass the hoistable profitability tests for this loop level
4733-
if (!optIsProfitableToHoistTree(tree, loop, hoistCtxt))
4753+
if (!optIsProfitableToHoistTree(tree, loop, hoistCtxt, defExecuted))
47344754
{
47354755
JITDUMP(" ... not profitable to hoist\n");
47364756
return;

0 commit comments

Comments
 (0)