Skip to content

Commit a5d3522

Browse files
authored
[SCEV] Rewrite A - B = UMin(1, A - B) lazily for A != B loop guards. (#163787)
Follow-up to 2d02726 (llvm/llvm-project#160500) Creating the SCEV subtraction eagerly is very expensive. To soften the blow, just collect a map with inequalities and check if we can apply the subtract rewrite when rewriting SCEVAddExpr. Restores most of the regression: http://llvm-compile-time-tracker.com/compare.php?from=0792478e4e133be96650444f3264e89d002fc058&to=7fca35db60fe6f423ea6051b45226046c067c252&stat=instructions:u stage1-O3: -0.10% stage1-ReleaseThinLTO: -0.09% stage1-ReleaseLTO-g: -0.10% stage1-O0-g: +0.02% stage1-aarch64-O3: -0.09% stage1-aarch64-O0-g: +0.00% stage2-O3: -0.17% stage2-O0-g: -0.05% stage2-clang: -0.07% There is still some negative impact compared to before 2d02726, but there's probably not much we could do reduce this even more. Compile-time improvement with 2d02726 reverted on top of the current PR: http://llvm-compile-time-tracker.com/compare.php?from=7fca35db60fe6f423ea6051b45226046c067c252&to=98dd152bdfc76b30d00190d3850d89406ca3c21f&stat=instructions:u stage1-O3: 60628M (-0.03%) stage1-ReleaseThinLTO: 76388M (-0.04%) stage1-ReleaseLTO-g: 89228M (-0.02%) stage1-O0-g: 18523M (-0.03%) stage1-aarch64-O3: 67623M (-0.03%) stage1-aarch64-O0-g: 22595M (+0.01%) stage2-O3: 52336M (+0.01%) stage2-O0-g: 16174M (+0.00%) stage2-clang: 34890032M (-0.03%) PR: llvm/llvm-project#163787
1 parent c491c6e commit a5d3522

File tree

3 files changed

+54
-15
lines changed

3 files changed

+54
-15
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,6 +1345,7 @@ class ScalarEvolution {
13451345

13461346
class LoopGuards {
13471347
DenseMap<const SCEV *, const SCEV *> RewriteMap;
1348+
SmallDenseSet<std::pair<const SCEV *, const SCEV *>> NotEqual;
13481349
bool PreserveNUW = false;
13491350
bool PreserveNSW = false;
13501351
ScalarEvolution &SE;

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15740,19 +15740,26 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
1574015740
GetNextSCEVDividesByDivisor(One, DividesBy);
1574115741
To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
1574215742
} else {
15743+
// LHS != RHS can be rewritten as (LHS - RHS) = UMax(1, LHS - RHS),
15744+
// but creating the subtraction eagerly is expensive. Track the
15745+
// inequalities in a separate map, and materialize the rewrite lazily
15746+
// when encountering a suitable subtraction while re-writing.
1574315747
if (LHS->getType()->isPointerTy()) {
1574415748
LHS = SE.getLosslessPtrToIntExpr(LHS);
1574515749
RHS = SE.getLosslessPtrToIntExpr(RHS);
1574615750
if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS))
1574715751
break;
1574815752
}
15749-
auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) {
15750-
const SCEV *Sub = SE.getMinusSCEV(A, B);
15751-
AddRewrite(Sub, Sub,
15752-
SE.getUMaxExpr(Sub, SE.getOne(From->getType())));
15753-
};
15754-
AddSubRewrite(LHS, RHS);
15755-
AddSubRewrite(RHS, LHS);
15753+
const SCEVConstant *C;
15754+
const SCEV *A, *B;
15755+
if (match(RHS, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) &&
15756+
match(LHS, m_scev_Add(m_scev_Specific(C), m_SCEV(B)))) {
15757+
RHS = A;
15758+
LHS = B;
15759+
}
15760+
if (LHS > RHS)
15761+
std::swap(LHS, RHS);
15762+
Guards.NotEqual.insert({LHS, RHS});
1575615763
continue;
1575715764
}
1575815765
break;
@@ -15886,13 +15893,15 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
1588615893
class SCEVLoopGuardRewriter
1588715894
: public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
1588815895
const DenseMap<const SCEV *, const SCEV *> &Map;
15896+
const SmallDenseSet<std::pair<const SCEV *, const SCEV *>> &NotEqual;
1588915897

1589015898
SCEV::NoWrapFlags FlagMask = SCEV::FlagAnyWrap;
1589115899

1589215900
public:
1589315901
SCEVLoopGuardRewriter(ScalarEvolution &SE,
1589415902
const ScalarEvolution::LoopGuards &Guards)
15895-
: SCEVRewriteVisitor(SE), Map(Guards.RewriteMap) {
15903+
: SCEVRewriteVisitor(SE), Map(Guards.RewriteMap),
15904+
NotEqual(Guards.NotEqual) {
1589615905
if (Guards.PreserveNUW)
1589715906
FlagMask = ScalarEvolution::setFlags(FlagMask, SCEV::FlagNUW);
1589815907
if (Guards.PreserveNSW)
@@ -15947,14 +15956,36 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
1594715956
}
1594815957

1594915958
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
15959+
// Helper to check if S is a subtraction (A - B) where A != B, and if so,
15960+
// return UMax(S, 1).
15961+
auto RewriteSubtraction = [&](const SCEV *S) -> const SCEV * {
15962+
const SCEV *LHS, *RHS;
15963+
if (MatchBinarySub(S, LHS, RHS)) {
15964+
if (LHS > RHS)
15965+
std::swap(LHS, RHS);
15966+
if (NotEqual.contains({LHS, RHS}))
15967+
return SE.getUMaxExpr(S, SE.getOne(S->getType()));
15968+
}
15969+
return nullptr;
15970+
};
15971+
15972+
// Check if Expr itself is a subtraction pattern with guard info.
15973+
if (const SCEV *Rewritten = RewriteSubtraction(Expr))
15974+
return Rewritten;
15975+
1595015976
// Trip count expressions sometimes consist of adding 3 operands, i.e.
1595115977
// (Const + A + B). There may be guard info for A + B, and if so, apply
1595215978
// it.
1595315979
// TODO: Could more generally apply guards to Add sub-expressions.
1595415980
if (isa<SCEVConstant>(Expr->getOperand(0)) &&
1595515981
Expr->getNumOperands() == 3) {
15956-
if (const SCEV *S = Map.lookup(
15957-
SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
15982+
const SCEV *Add =
15983+
SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2));
15984+
if (const SCEV *Rewritten = RewriteSubtraction(Add))
15985+
return SE.getAddExpr(
15986+
Expr->getOperand(0), Rewritten,
15987+
ScalarEvolution::maskFlags(Expr->getNoWrapFlags(), FlagMask));
15988+
if (const SCEV *S = Map.lookup(Add))
1595815989
return SE.getAddExpr(Expr->getOperand(0), S);
1595915990
}
1596015991
SmallVector<const SCEV *, 2> Operands;
@@ -15989,7 +16020,7 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
1598916020
}
1599016021
};
1599116022

15992-
if (RewriteMap.empty())
16023+
if (RewriteMap.empty() && NotEqual.empty())
1599316024
return Expr;
1599416025

1599516026
SCEVLoopGuardRewriter Rewriter(SE, *this);

llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,15 +111,14 @@ define void @test_sub_cmp(ptr align 8 %start, ptr %end) {
111111
; N32-NEXT: [[CMP_ENTRY:%.*]] = icmp eq ptr [[START]], [[END]]
112112
; N32-NEXT: br i1 [[CMP_ENTRY]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]]
113113
; N32: [[LOOP_HEADER_PREHEADER]]:
114-
; N32-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[PTR_DIFF]], i64 1)
115114
; N32-NEXT: br label %[[LOOP_HEADER:.*]]
116115
; N32: [[LOOP_HEADER]]:
117116
; N32-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ]
118117
; N32-NEXT: [[C_1:%.*]] = call i1 @cond()
119118
; N32-NEXT: br i1 [[C_1]], label %[[EXIT_EARLY:.*]], label %[[LOOP_LATCH]]
120119
; N32: [[LOOP_LATCH]]:
121120
; N32-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
122-
; N32-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UMAX]]
121+
; N32-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[PTR_DIFF]]
123122
; N32-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT_LOOPEXIT:.*]]
124123
; N32: [[EXIT_EARLY]]:
125124
; N32-NEXT: br label %[[EXIT]]
@@ -162,13 +161,17 @@ define void @test_ptr_diff_with_assume(ptr align 8 %start, ptr align 8 %end, ptr
162161
; CHECK-NEXT: [[PTR_DIFF:%.*]] = sub i64 [[START_INT]], [[END_INT]]
163162
; CHECK-NEXT: [[DIFF_CMP:%.*]] = icmp ult i64 [[PTR_DIFF]], 2
164163
; CHECK-NEXT: call void @llvm.assume(i1 [[DIFF_CMP]])
164+
; CHECK-NEXT: [[COMPUTED_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[PTR_DIFF]]
165165
; CHECK-NEXT: [[ENTRY_CMP:%.*]] = icmp eq ptr [[START]], [[END]]
166166
; CHECK-NEXT: br i1 [[ENTRY_CMP]], label %[[EXIT:.*]], label %[[LOOP_BODY_PREHEADER:.*]]
167167
; CHECK: [[LOOP_BODY_PREHEADER]]:
168168
; CHECK-NEXT: br label %[[LOOP_BODY:.*]]
169169
; CHECK: [[LOOP_BODY]]:
170+
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ], [ [[START]], %[[LOOP_BODY_PREHEADER]] ]
170171
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @cond()
171-
; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]]
172+
; CHECK-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 1
173+
; CHECK-NEXT: [[LOOP_CMP:%.*]] = icmp eq ptr [[IV_NEXT]], [[COMPUTED_END]]
174+
; CHECK-NEXT: br i1 [[LOOP_CMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]]
172175
; CHECK: [[EXIT_LOOPEXIT]]:
173176
; CHECK-NEXT: br label %[[EXIT]]
174177
; CHECK: [[EXIT]]:
@@ -182,13 +185,17 @@ define void @test_ptr_diff_with_assume(ptr align 8 %start, ptr align 8 %end, ptr
182185
; N32-NEXT: [[PTR_DIFF:%.*]] = sub i64 [[START_INT]], [[END_INT]]
183186
; N32-NEXT: [[DIFF_CMP:%.*]] = icmp ult i64 [[PTR_DIFF]], 2
184187
; N32-NEXT: call void @llvm.assume(i1 [[DIFF_CMP]])
188+
; N32-NEXT: [[COMPUTED_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[PTR_DIFF]]
185189
; N32-NEXT: [[ENTRY_CMP:%.*]] = icmp eq ptr [[START]], [[END]]
186190
; N32-NEXT: br i1 [[ENTRY_CMP]], label %[[EXIT:.*]], label %[[LOOP_BODY_PREHEADER:.*]]
187191
; N32: [[LOOP_BODY_PREHEADER]]:
188192
; N32-NEXT: br label %[[LOOP_BODY:.*]]
189193
; N32: [[LOOP_BODY]]:
194+
; N32-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ], [ [[START]], %[[LOOP_BODY_PREHEADER]] ]
190195
; N32-NEXT: [[TMP0:%.*]] = call i1 @cond()
191-
; N32-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]]
196+
; N32-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 1
197+
; N32-NEXT: [[LOOP_CMP:%.*]] = icmp eq ptr [[IV_NEXT]], [[COMPUTED_END]]
198+
; N32-NEXT: br i1 [[LOOP_CMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_BODY]]
192199
; N32: [[EXIT_LOOPEXIT]]:
193200
; N32-NEXT: br label %[[EXIT]]
194201
; N32: [[EXIT]]:

0 commit comments

Comments
 (0)