Skip to content

Commit b595b8c

Browse files
committed
[SCEV] Collect and merge loop guards through PHI nodes with multiple incoming Values
1 parent 9f3728d commit b595b8c

File tree

4 files changed

+164
-10
lines changed

4 files changed

+164
-10
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,6 +1316,11 @@ class ScalarEvolution {
13161316

13171317
LoopGuards(ScalarEvolution &SE) : SE(SE) {}
13181318

1319+
static LoopGuards
1320+
collectFromBlock(ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
1321+
const BasicBlock *Block, const BasicBlock *Pred,
1322+
SmallPtrSet<const BasicBlock *, 8> VisitedBlocks);
1323+
13191324
public:
13201325
/// Collect rewrite map for loop guards for loop \p L, together with flags
13211326
/// indicating if NUW and NSW can be preserved during rewriting.

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 76 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10648,7 +10648,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB)
1064810648
if (const Loop *L = LI.getLoopFor(BB))
1064910649
return {L->getLoopPredecessor(), L->getHeader()};
1065010650

10651-
return {nullptr, nullptr};
10651+
return {nullptr, BB};
1065210652
}
1065310653

1065410654
/// SCEV structural equivalence is usually sufficient for testing whether two
@@ -15217,7 +15217,16 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,
1521715217

1521815218
ScalarEvolution::LoopGuards
1521915219
ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
15220+
BasicBlock *Header = L->getHeader();
15221+
BasicBlock *Pred = L->getLoopPredecessor();
1522015222
LoopGuards Guards(SE);
15223+
return collectFromBlock(SE, Guards, Header, Pred, {});
15224+
}
15225+
15226+
ScalarEvolution::LoopGuards ScalarEvolution::LoopGuards::collectFromBlock(
15227+
ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
15228+
const BasicBlock *Block, const BasicBlock *Pred,
15229+
SmallPtrSet<const BasicBlock *, 8> VisitedBlocks) {
1522115230
SmallVector<const SCEV *> ExprsToRewrite;
1522215231
auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
1522315232
const SCEV *RHS,
@@ -15556,14 +15565,13 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
1555615565
}
1555715566
};
1555815567

15559-
BasicBlock *Header = L->getHeader();
1556015568
SmallVector<PointerIntPair<Value *, 1, bool>> Terms;
1556115569
// First, collect information from assumptions dominating the loop.
1556215570
for (auto &AssumeVH : SE.AC.assumptions()) {
1556315571
if (!AssumeVH)
1556415572
continue;
1556515573
auto *AssumeI = cast<CallInst>(AssumeVH);
15566-
if (!SE.DT.dominates(AssumeI, Header))
15574+
if (!SE.DT.dominates(AssumeI, Block))
1556715575
continue;
1556815576
Terms.emplace_back(AssumeI->getOperand(0), true);
1556915577
}
@@ -15574,20 +15582,19 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
1557415582
if (GuardDecl)
1557515583
for (const auto *GU : GuardDecl->users())
1557615584
if (const auto *Guard = dyn_cast<IntrinsicInst>(GU))
15577-
if (Guard->getFunction() == Header->getParent() &&
15578-
SE.DT.dominates(Guard, Header))
15585+
if (Guard->getFunction() == Block->getParent() &&
15586+
SE.DT.dominates(Guard, Block))
1557915587
Terms.emplace_back(Guard->getArgOperand(0), true);
1558015588

1558115589
// Third, collect conditions from dominating branches. Starting at the loop
1558215590
// predecessor, climb up the predecessor chain, as long as there are
1558315591
// predecessors that can be found that have unique successors leading to the
1558415592
// original header.
1558515593
// TODO: share this logic with isLoopEntryGuardedByCond.
15586-
for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
15587-
L->getLoopPredecessor(), Header);
15588-
Pair.first;
15594+
std::pair<const BasicBlock *, const BasicBlock *> Pair(Pred, Block);
15595+
for (; Pair.first;
1558915596
Pair = SE.getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
15590-
15597+
VisitedBlocks.insert(Pair.second);
1559115598
const BranchInst *LoopEntryPredicate =
1559215599
dyn_cast<BranchInst>(Pair.first->getTerminator());
1559315600
if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional())
@@ -15596,6 +15603,66 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
1559615603
Terms.emplace_back(LoopEntryPredicate->getCondition(),
1559715604
LoopEntryPredicate->getSuccessor(0) == Pair.second);
1559815605
}
15606+
// Finally, if we stopped climbing the predecessor chain because
15607+
// there wasn't a unique one to continue, try to collect conditions
15608+
// for PHINodes by recursively following all of their incoming
15609+
// blocks and try to merge the found conditions to build a new one
15610+
// for the Phi.
15611+
if (Pair.second->hasNPredecessorsOrMore(2)) {
15612+
for (auto &Phi : Pair.second->phis()) {
15613+
if (!SE.isSCEVable(Phi.getType()))
15614+
continue;
15615+
15616+
using MinMaxPattern = std::pair<const SCEVConstant *, SCEVTypes>;
15617+
auto GetMinMaxConst = [&SE, &VisitedBlocks, &Pair,
15618+
&Phi](unsigned int In) -> MinMaxPattern {
15619+
LoopGuards G(SE);
15620+
if (VisitedBlocks.insert(Phi.getIncomingBlock(In)).second)
15621+
collectFromBlock(SE, G, Pair.second, Phi.getIncomingBlock(In),
15622+
VisitedBlocks);
15623+
const SCEV *S = G.RewriteMap[SE.getSCEV(Phi.getIncomingValue(In))];
15624+
auto *SM = dyn_cast_if_present<SCEVMinMaxExpr>(S);
15625+
if (!SM)
15626+
return {nullptr, scCouldNotCompute};
15627+
if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(SM->getOperand(0)))
15628+
return {C0, SM->getSCEVType()};
15629+
if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(SM->getOperand(1)))
15630+
return {C1, SM->getSCEVType()};
15631+
return {nullptr, scCouldNotCompute};
15632+
};
15633+
auto MergeMinMaxConst = [](MinMaxPattern P1,
15634+
MinMaxPattern P2) -> MinMaxPattern {
15635+
auto [C1, T1] = P1;
15636+
auto [C2, T2] = P2;
15637+
if (!C1 || !C2 || T1 != T2)
15638+
return {nullptr, scCouldNotCompute};
15639+
switch (T1) {
15640+
case scUMaxExpr:
15641+
return {C1->getAPInt().ult(C2->getAPInt()) ? C1 : C2, T1};
15642+
case scSMaxExpr:
15643+
return {C1->getAPInt().slt(C2->getAPInt()) ? C1 : C2, T1};
15644+
case scUMinExpr:
15645+
return {C1->getAPInt().ugt(C2->getAPInt()) ? C1 : C2, T1};
15646+
case scSMinExpr:
15647+
return {C1->getAPInt().sgt(C2->getAPInt()) ? C1 : C2, T1};
15648+
default:
15649+
llvm_unreachable("Trying to merge non-MinMaxExpr SCEVs.");
15650+
}
15651+
};
15652+
auto P = GetMinMaxConst(0);
15653+
for (unsigned int In = 1; In < Phi.getNumIncomingValues(); In++) {
15654+
if (!P.first)
15655+
break;
15656+
P = MergeMinMaxConst(P, GetMinMaxConst(In));
15657+
}
15658+
if (P.first) {
15659+
const SCEV *LHS = SE.getSCEV(const_cast<PHINode *>(&Phi));
15660+
SmallVector<const SCEV *, 2> Ops({P.first, LHS});
15661+
const SCEV *RHS = SE.getMinMaxExpr(P.second, Ops);
15662+
Guards.RewriteMap.insert({LHS, RHS});
15663+
}
15664+
}
15665+
}
1559915666

1560015667
// Now apply the information from the collected conditions to
1560115668
// Guards.RewriteMap. Conditions are processed in reverse order, so the

llvm/test/Analysis/ScalarEvolution/trip-count.ll

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,85 @@ for.body:
211211
exit:
212212
ret void
213213
}
214+
215+
define void @epilogue(i64 %count) {
216+
; CHECK-LABEL: 'epilogue'
217+
; CHECK-NEXT: Determining loop execution counts for: @epilogue
218+
; CHECK-NEXT: Loop %epilogue: backedge-taken count is (-1 + %count.epilogue)
219+
; CHECK-NEXT: Loop %epilogue: constant max backedge-taken count is i64 6
220+
; CHECK-NEXT: Loop %epilogue: symbolic max backedge-taken count is (-1 + %count.epilogue)
221+
; CHECK-NEXT: Loop %epilogue: Trip multiple is 1
222+
; CHECK-NEXT: Loop %while.body: backedge-taken count is ((-8 + %count) /u 8)
223+
; CHECK-NEXT: Loop %while.body: constant max backedge-taken count is i64 2305843009213693951
224+
; CHECK-NEXT: Loop %while.body: symbolic max backedge-taken count is ((-8 + %count) /u 8)
225+
; CHECK-NEXT: Loop %while.body: Trip multiple is 1
226+
entry:
227+
%cmp = icmp ugt i64 %count, 7
228+
br i1 %cmp, label %while.body, label %epilogue.preheader
229+
230+
while.body:
231+
%iv = phi i64 [ %sub, %while.body ], [ %count, %entry ]
232+
%sub = add i64 %iv, -8
233+
%exitcond.not = icmp ugt i64 %sub, 7
234+
br i1 %exitcond.not, label %while.body, label %while.loopexit
235+
236+
while.loopexit:
237+
%sub.exit = phi i64 [ %sub, %while.body ]
238+
br label %epilogue.preheader
239+
240+
epilogue.preheader:
241+
%count.epilogue = phi i64 [ %count, %entry ], [ %sub.exit, %while.loopexit ]
242+
%epilogue.cmp = icmp eq i64 %count.epilogue, 0
243+
br i1 %epilogue.cmp, label %exit, label %epilogue
244+
245+
epilogue:
246+
%iv.epilogue = phi i64 [ %dec, %epilogue ], [ %count.epilogue, %epilogue.preheader ]
247+
%dec = add i64 %iv.epilogue, -1
248+
%exitcond.epilogue = icmp eq i64 %dec, 0
249+
br i1 %exitcond.epilogue, label %exit, label %epilogue
250+
251+
exit:
252+
ret void
253+
254+
}
255+
256+
define void @epilogue2(i64 %count) {
257+
; CHECK-LABEL: 'epilogue2'
258+
; CHECK-NEXT: Determining loop execution counts for: @epilogue2
259+
; CHECK-NEXT: Loop %epilogue: backedge-taken count is (-1 + %count.epilogue)
260+
; CHECK-NEXT: Loop %epilogue: constant max backedge-taken count is i64 8
261+
; CHECK-NEXT: Loop %epilogue: symbolic max backedge-taken count is (-1 + %count.epilogue)
262+
; CHECK-NEXT: Loop %epilogue: Trip multiple is 1
263+
; CHECK-NEXT: Loop %while.body: backedge-taken count is ((-8 + %count) /u 8)
264+
; CHECK-NEXT: Loop %while.body: constant max backedge-taken count is i64 2305843009213693951
265+
; CHECK-NEXT: Loop %while.body: symbolic max backedge-taken count is ((-8 + %count) /u 8)
266+
; CHECK-NEXT: Loop %while.body: Trip multiple is 1
267+
entry:
268+
%cmp = icmp ugt i64 %count, 9
269+
br i1 %cmp, label %while.body, label %epilogue.preheader
270+
271+
while.body:
272+
%iv = phi i64 [ %sub, %while.body ], [ %count, %entry ]
273+
%sub = add i64 %iv, -8
274+
%exitcond.not = icmp ugt i64 %sub, 7
275+
br i1 %exitcond.not, label %while.body, label %while.loopexit
276+
277+
while.loopexit:
278+
%sub.exit = phi i64 [ %sub, %while.body ]
279+
br label %epilogue.preheader
280+
281+
epilogue.preheader:
282+
%count.epilogue = phi i64 [ %count, %entry ], [ %sub.exit, %while.loopexit ]
283+
%epilogue.cmp = icmp eq i64 %count.epilogue, 0
284+
br i1 %epilogue.cmp, label %exit, label %epilogue
285+
286+
epilogue:
287+
%iv.epilogue = phi i64 [ %dec, %epilogue ], [ %count.epilogue, %epilogue.preheader ]
288+
%dec = add i64 %iv.epilogue, -1
289+
%exitcond.epilogue = icmp eq i64 %dec, 0
290+
br i1 %exitcond.epilogue, label %exit, label %epilogue
291+
292+
exit:
293+
ret void
294+
295+
}

llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ define void @apply_delta(ptr nocapture noundef %dst, ptr nocapture noundef reado
4141
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 1
4242
; CHECK-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i8, ptr [[SRC_ADDR_129]], i64 1
4343
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[DEC]], 0
44-
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]], !llvm.loop [[LOOP0:![0-9]+]]
44+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]]
4545
; CHECK: while.end9:
4646
; CHECK-NEXT: ret void
4747
;

0 commit comments

Comments
 (0)