-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[SCEV] Rewrite more SCEVAddExpr when applying guards. #159942
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
74447bb
to
237861f
Compare
237861f
to
fd7152b
Compare
fd7152b
to
8d4a5e6
Compare
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesWhen re-writing SCEVAddExprs to apply information from guards, check if When we have an expression of the form (Const + A), check if we have Rewriting expressions could cause regressions, e.g. when comparing 2 Depends on #160012 (included in Proofs for some of the test changes: https://alive2.llvm.org/ce/z/RPX6t_ Full diff: https://github.com/llvm/llvm-project/pull/159942.diff 7 Files Affected:
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index b08399b381f34..09a31b105e128 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15850,12 +15850,17 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
To = SE.getUMaxExpr(FromRewritten, RHS);
if (auto *UMin = dyn_cast<SCEVUMinExpr>(FromRewritten))
EnqueueOperands(UMin);
+ if (RHS->isOne())
+ ExprsToRewrite.push_back(From);
break;
case CmpInst::ICMP_SGT:
case CmpInst::ICMP_SGE:
To = SE.getSMaxExpr(FromRewritten, RHS);
- if (auto *SMin = dyn_cast<SCEVSMinExpr>(FromRewritten))
+ if (auto *SMin = dyn_cast<SCEVSMinExpr>(FromRewritten)) {
EnqueueOperands(SMin);
+ }
+ if (RHS->isOne())
+ ExprsToRewrite.push_back(From);
break;
case CmpInst::ICMP_EQ:
if (isa<SCEVConstant>(RHS))
@@ -15986,7 +15991,22 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
for (const SCEV *Expr : ExprsToRewrite) {
const SCEV *RewriteTo = Guards.RewriteMap[Expr];
Guards.RewriteMap.erase(Expr);
- Guards.RewriteMap.insert({Expr, Guards.rewrite(RewriteTo)});
+ const SCEV *Rewritten = Guards.rewrite(RewriteTo);
+
+ // Try to strengthen divisibility of SMax/UMax expressions coming from >=
+ // 1 conditions.
+ if (auto *SMax = dyn_cast<SCEVSMaxExpr>(Rewritten)) {
+ unsigned MinTrailingZeros = SE.getMinTrailingZeros(SMax->getOperand(1));
+ for (const SCEV *Op : drop_begin(SMax->operands(), 2))
+ MinTrailingZeros =
+ std::min(MinTrailingZeros, SE.getMinTrailingZeros(Op));
+ if (MinTrailingZeros != 0)
+ Rewritten = SE.getSMaxExpr(
+ SE.getConstant(APInt(SMax->getType()->getScalarSizeInBits(), 1)
+ .shl(MinTrailingZeros)),
+ SMax);
+ }
+ Guards.RewriteMap.insert({Expr, Rewritten});
}
}
}
@@ -16059,16 +16079,32 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
- // Trip count expressions sometimes consist of adding 3 operands, i.e.
- // (Const + A + B). There may be guard info for A + B, and if so, apply
- // it.
- // TODO: Could more generally apply guards to Add sub-expressions.
- if (isa<SCEVConstant>(Expr->getOperand(0)) &&
- Expr->getNumOperands() == 3) {
- if (const SCEV *S = Map.lookup(
- SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
- return SE.getAddExpr(Expr->getOperand(0), S);
+ if (const SCEV *S = Map.lookup(Expr))
+ return S;
+ if (isa<SCEVConstant>(Expr->getOperand(0))) {
+ // Trip count expressions sometimes consist of adding 3 operands, i.e.
+ // (Const + A + B). There may be guard info for A + B, and if so, apply
+ // it.
+ // TODO: Could more generally apply guards to Add sub-expressions.
+ if (Expr->getNumOperands() == 3) {
+ if (const SCEV *S = Map.lookup(
+ SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
+ return SE.getAddExpr(Expr->getOperand(0), S);
+ }
+
+ // For expressions of the form (Const + A), check if we have guard info
+ // for (Const + 1 + A), and rewrite to ((Const + 1 + A) - 1). This makes
+ // sure we don't loose information when rewriting expressions based on
+ // back-edge taken counts in some cases..
+ if (Expr->getNumOperands() == 2) {
+ auto *NewC =
+ SE.getAddExpr(Expr->getOperand(0), SE.getOne(Expr->getType()));
+ if (const SCEV *S =
+ Map.lookup(SE.getAddExpr(NewC, Expr->getOperand(1))))
+ return SE.getMinusSCEV(S, SE.getOne(Expr->getType()));
+ }
}
+
SmallVector<const SCEV *, 2> Operands;
bool Changed = false;
for (const auto *Op : Expr->operands()) {
diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
index 6b2c78cebc44a..5ea836d3b8067 100644
--- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
@@ -33,9 +33,9 @@ declare void @clobber()
define void @test_add_sub_1_guard(ptr %src, i32 %n) {
; CHECK-LABEL: 'test_add_sub_1_guard'
; CHECK-NEXT: Determining loop execution counts for: @test_add_sub_1_guard
-; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
-; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
-; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
+; CHECK-NEXT: Loop %loop: backedge-taken count is i64 0
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 0
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i64 0
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
index 8d091a00ed4b9..2f0627b7d4476 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
@@ -61,7 +61,7 @@ define void @umin(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umin (4 * %b)))
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umin (4 * %b)))
-; CHECK-NEXT: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
; void umin(unsigned a, unsigned b) {
; a *= 2;
@@ -102,12 +102,12 @@ define void @umax(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
; CHECK-NEXT: --> ((2 * %a) umax (4 * %b)) U: [0,-1) S: [-2147483648,2147483647)
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
-; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @umax
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
-; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 -3
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
@@ -157,7 +157,7 @@ define void @smin(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>))
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>))
-; CHECK-NEXT: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
; void smin(signed a, signed b) {
; a *= 2;
@@ -197,12 +197,12 @@ define void @smax(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
; CHECK-NEXT: --> ((2 * %a)<nsw> smax (4 * %b)<nsw>) U: [0,-1) S: [-2147483648,2147483647)
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
-; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @smax
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
-; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 -3
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
diff --git a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
index 4b52479fc6c4d..40e3c63cbe04a 100644
--- a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
@@ -343,14 +343,13 @@ define void @slt_no_smax_needed(i64 %n, ptr %dst) {
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
; CHECK: loop.preheader:
-; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SHR]], i32 1)
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
@@ -385,14 +384,13 @@ define void @ult_no_umax_needed(i64 %n, ptr %dst) {
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
; CHECK: loop.preheader:
-; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHR]], i32 1)
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[UMAX]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
diff --git a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
index bc1543d8361a7..09419c13aaeb0 100644
--- a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
+++ b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
@@ -61,9 +61,9 @@ define void @test_memset_size_can_use_info_from_guards(i32 %x, ptr %dst) {
; CHECK: [[LOOP1_BACKEDGE]]:
; CHECK-NEXT: br label %[[LOOP1]]
; CHECK: [[LOOP2_PREHEADER]]:
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SUB]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
-; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SHR]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[UMAX:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[DST]], i8 0, i64 [[UMAX]], i1 false)
; CHECK-NEXT: br label %[[LOOP2:.*]]
; CHECK: [[LOOP2]]:
diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
index 4f5a26e9c89cb..4a9b2bd7cc888 100644
--- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
+++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
@@ -193,7 +193,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
; CHECK-NEXT: store i32 13, ptr [[TMP12]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
index 648ebc7e6c3a5..a556b15adbefc 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
@@ -465,12 +465,7 @@ define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) {
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP9]], [[TMP14]]
-; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
-; CHECK: [[VECTOR_MEMCHECK]]:
-; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64
-; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2
-; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP17]], 16
-; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH1:label %.*]]
+; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]]
;
entry:
%offset = sub i32 %x, %y
|
@llvm/pr-subscribers-llvm-analysis Author: Florian Hahn (fhahn) ChangesWhen re-writing SCEVAddExprs to apply information from guards, check if When we have an expression of the form (Const + A), check if we have Rewriting expressions could cause regressions, e.g. when comparing 2 Depends on #160012 (included in Proofs for some of the test changes: https://alive2.llvm.org/ce/z/RPX6t_ Full diff: https://github.com/llvm/llvm-project/pull/159942.diff 7 Files Affected:
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index b08399b381f34..09a31b105e128 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15850,12 +15850,17 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
To = SE.getUMaxExpr(FromRewritten, RHS);
if (auto *UMin = dyn_cast<SCEVUMinExpr>(FromRewritten))
EnqueueOperands(UMin);
+ if (RHS->isOne())
+ ExprsToRewrite.push_back(From);
break;
case CmpInst::ICMP_SGT:
case CmpInst::ICMP_SGE:
To = SE.getSMaxExpr(FromRewritten, RHS);
- if (auto *SMin = dyn_cast<SCEVSMinExpr>(FromRewritten))
+ if (auto *SMin = dyn_cast<SCEVSMinExpr>(FromRewritten)) {
EnqueueOperands(SMin);
+ }
+ if (RHS->isOne())
+ ExprsToRewrite.push_back(From);
break;
case CmpInst::ICMP_EQ:
if (isa<SCEVConstant>(RHS))
@@ -15986,7 +15991,22 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
for (const SCEV *Expr : ExprsToRewrite) {
const SCEV *RewriteTo = Guards.RewriteMap[Expr];
Guards.RewriteMap.erase(Expr);
- Guards.RewriteMap.insert({Expr, Guards.rewrite(RewriteTo)});
+ const SCEV *Rewritten = Guards.rewrite(RewriteTo);
+
+ // Try to strengthen divisibility of SMax/UMax expressions coming from >=
+ // 1 conditions.
+ if (auto *SMax = dyn_cast<SCEVSMaxExpr>(Rewritten)) {
+ unsigned MinTrailingZeros = SE.getMinTrailingZeros(SMax->getOperand(1));
+ for (const SCEV *Op : drop_begin(SMax->operands(), 2))
+ MinTrailingZeros =
+ std::min(MinTrailingZeros, SE.getMinTrailingZeros(Op));
+ if (MinTrailingZeros != 0)
+ Rewritten = SE.getSMaxExpr(
+ SE.getConstant(APInt(SMax->getType()->getScalarSizeInBits(), 1)
+ .shl(MinTrailingZeros)),
+ SMax);
+ }
+ Guards.RewriteMap.insert({Expr, Rewritten});
}
}
}
@@ -16059,16 +16079,32 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
- // Trip count expressions sometimes consist of adding 3 operands, i.e.
- // (Const + A + B). There may be guard info for A + B, and if so, apply
- // it.
- // TODO: Could more generally apply guards to Add sub-expressions.
- if (isa<SCEVConstant>(Expr->getOperand(0)) &&
- Expr->getNumOperands() == 3) {
- if (const SCEV *S = Map.lookup(
- SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
- return SE.getAddExpr(Expr->getOperand(0), S);
+ if (const SCEV *S = Map.lookup(Expr))
+ return S;
+ if (isa<SCEVConstant>(Expr->getOperand(0))) {
+ // Trip count expressions sometimes consist of adding 3 operands, i.e.
+ // (Const + A + B). There may be guard info for A + B, and if so, apply
+ // it.
+ // TODO: Could more generally apply guards to Add sub-expressions.
+ if (Expr->getNumOperands() == 3) {
+ if (const SCEV *S = Map.lookup(
+ SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
+ return SE.getAddExpr(Expr->getOperand(0), S);
+ }
+
+ // For expressions of the form (Const + A), check if we have guard info
+ // for (Const + 1 + A), and rewrite to ((Const + 1 + A) - 1). This makes
+ // sure we don't loose information when rewriting expressions based on
+ // back-edge taken counts in some cases..
+ if (Expr->getNumOperands() == 2) {
+ auto *NewC =
+ SE.getAddExpr(Expr->getOperand(0), SE.getOne(Expr->getType()));
+ if (const SCEV *S =
+ Map.lookup(SE.getAddExpr(NewC, Expr->getOperand(1))))
+ return SE.getMinusSCEV(S, SE.getOne(Expr->getType()));
+ }
}
+
SmallVector<const SCEV *, 2> Operands;
bool Changed = false;
for (const auto *Op : Expr->operands()) {
diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
index 6b2c78cebc44a..5ea836d3b8067 100644
--- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
@@ -33,9 +33,9 @@ declare void @clobber()
define void @test_add_sub_1_guard(ptr %src, i32 %n) {
; CHECK-LABEL: 'test_add_sub_1_guard'
; CHECK-NEXT: Determining loop execution counts for: @test_add_sub_1_guard
-; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
-; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
-; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
+; CHECK-NEXT: Loop %loop: backedge-taken count is i64 0
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 0
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i64 0
; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
index 8d091a00ed4b9..2f0627b7d4476 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
@@ -61,7 +61,7 @@ define void @umin(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umin (4 * %b)))
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umin (4 * %b)))
-; CHECK-NEXT: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
; void umin(unsigned a, unsigned b) {
; a *= 2;
@@ -102,12 +102,12 @@ define void @umax(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
; CHECK-NEXT: --> ((2 * %a) umax (4 * %b)) U: [0,-1) S: [-2147483648,2147483647)
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
-; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @umax
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
-; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 -3
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
@@ -157,7 +157,7 @@ define void @smin(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>))
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>))
-; CHECK-NEXT: Loop %for.body: Trip multiple is 1
+; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
; void smin(signed a, signed b) {
; a *= 2;
@@ -197,12 +197,12 @@ define void @smax(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
; CHECK-NEXT: --> ((2 * %a)<nsw> smax (4 * %b)<nsw>) U: [0,-1) S: [-2147483648,2147483647)
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
-; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @smax
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
-; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 -3
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
; CHECK-NEXT: Loop %for.body: Trip multiple is 2
;
diff --git a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
index 4b52479fc6c4d..40e3c63cbe04a 100644
--- a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll
@@ -343,14 +343,13 @@ define void @slt_no_smax_needed(i64 %n, ptr %dst) {
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
; CHECK: loop.preheader:
-; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SHR]], i32 1)
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
@@ -385,14 +384,13 @@ define void @ult_no_umax_needed(i64 %n, ptr %dst) {
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
; CHECK: loop.preheader:
-; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHR]], i32 1)
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[UMAX]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
diff --git a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
index bc1543d8361a7..09419c13aaeb0 100644
--- a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
+++ b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
@@ -61,9 +61,9 @@ define void @test_memset_size_can_use_info_from_guards(i32 %x, ptr %dst) {
; CHECK: [[LOOP1_BACKEDGE]]:
; CHECK-NEXT: br label %[[LOOP1]]
; CHECK: [[LOOP2_PREHEADER]]:
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SUB]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
-; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SHR]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[UMAX:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[DST]], i8 0, i64 [[UMAX]], i1 false)
; CHECK-NEXT: br label %[[LOOP2:.*]]
; CHECK: [[LOOP2]]:
diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
index 4f5a26e9c89cb..4a9b2bd7cc888 100644
--- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
+++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
@@ -193,7 +193,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
; CHECK-NEXT: store i32 13, ptr [[TMP12]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
index 648ebc7e6c3a5..a556b15adbefc 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
@@ -465,12 +465,7 @@ define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) {
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP9]], [[TMP14]]
-; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
-; CHECK: [[VECTOR_MEMCHECK]]:
-; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64
-; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2
-; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP17]], 16
-; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH1:label %.*]]
+; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]]
;
entry:
%offset = sub i32 %x, %y
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This depends on #160012 to prevent a regression where we loose divisibility info by the rewrite
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SHR]], -1 | ||
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 | ||
; CHECK-NEXT: [[UMAX:%.*]] = add nuw nsw i64 [[TMP1]], 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is missing a fold end-to-end from https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2845/files/ed058c0c689f0dadf267033c74e0960c3593baff#r2367574398
When collecting information from loop guards, use UMax(1, %b - %a) for ICMP NE %a, %b, if neither are constant. This improves results in some cases, and will be even more useful together with * llvm#160012 * llvm#159942 https://alive2.llvm.org/ce/z/YyBvoT
When collecting information from loop guards, use UMax(1, %b - %a) for ICMP NE %a, %b, if neither are constant. This improves results in some cases, and will be even more useful together with * llvm#160012 * llvm#159942 https://alive2.llvm.org/ce/z/YyBvoT
Can you please add some more detail on why this specific pattern is relevant? Based on tests it clearly is, I'm just not super clear on why. |
SE.getAddExpr(Expr->getOperand(0), SE.getOne(Expr->getType())); | ||
if (const SCEV *S = | ||
Map.lookup(SE.getAddExpr(NewC, Expr->getOperand(1)))) | ||
return SE.getMinusSCEV(S, SE.getOne(Expr->getType())); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can directly use add with -1 here and avoid an extra canonicalization step.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
When collecting information from loop guards, use UMax(1, %b - %a) for ICMP NE %a, %b, if neither are constant. This improves results in some cases, and will be even more useful together with * llvm#160012 * llvm#159942 https://alive2.llvm.org/ce/z/YyBvoT
When collecting information from loop guards, use UMax(1, %b - %a) for ICMP NE %a, %b, if neither are constant. This improves results in some cases, and will be even more useful together with * llvm#160012 * llvm#159942 https://alive2.llvm.org/ce/z/YyBvoT
8d4a5e6
to
5180e68
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please add some more detail on why this specific pattern is relevant? Based on tests it clearly is, I'm just not super clear on why.
The general case where this can help is if we have a guard for an SCEVAddExpr (e.g x - y > 0). Previously we would only re-write the operands of SCEVAddExprs, and would not apply the guard info.
The logic with (Const + 1 + A) -> ((Const + 1 + A) - 1)
is needed to avoid regressions when comparing expressions with guards applied (like BTC/exit count expressions or the value at the last iteration vs value at 2n-to-last iteration). With the current changes, we may apply info to only one of the those expressions, which can lead to pessimizations. One example is if we have (n - 2 != n - 1) and we have {n - 1: umin(1, n-1)}, then we would only rewrite (n -1) -> umin(1, n-1) and fail to simplify the compare to true.
// For expressions of the form (Const + A), check if we have guard info | ||
// for (Const + 1 + A), and rewrite to ((Const + 1 + A) - 1). This makes | ||
// sure we don't loose information when rewriting expressions based on | ||
// back-edge taken counts in some cases.. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// back-edge taken counts in some cases.. | |
// back-edge taken counts in some cases. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-count=2 | FileCheck %s | ||
|
||
; Make sure the loop is unrolled without a remainder loop based on an assumption | ||
; that the least significant bit is known to be zero. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Aren't we failing the intent of this test now? We now produce an epilogue loop.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The changes here actually was a regression, as we failed to determine the expected multple of the trip count from the guards. To fix that, I think we need to improve the divisibility handling in general: #163021 (included in this PR now)
At the moment, the effectivness of guards that contain divisibility information (A % B == 0 ) depends on the order of the conditions. This patch makes using divisibility information independent of the order, by collecting and applying the divisibility information separately. We first collect all conditions in a vector, then collect the divisibility information from all guards. When processing other guards, we apply divisibility info collected earlier. After all guards have been processed, we add the divisibility info, rewriting the existing rewrite. This ensures we apply the divisibility info to the largest rewrite expression. This helps to improve results in a few cases, one in dtcxzyw/llvm-opt-benchmark#2921 and another one in a different large C/C++ based IR corpus.
When re-writing SCEVAddExprs to apply information from guards, check if we have information for the expression itself. If so, apply it. When we have an expression of the form (Const + A), check if we have have guard info for (Const + 1 + A) and use it. This is needed to avoid regressions in a few cases, where we have BTCs with a subtracted constant. Rewriting expressions could cause regressions, e.g. when comparing 2 SCEV expressions where we are only able to rewrite one side, but I could not find any cases where this happens more with this patch in practice. Depends on llvm#160012 (included in PR) Proofs for some of the test changes: https://alive2.llvm.org/ce/z/RPX6t_
5180e68
to
0710344
Compare
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions h,cpp -- llvm/include/llvm/Analysis/ScalarEvolution.h llvm/lib/Analysis/ScalarEvolution.cpp
View the diff from clang-format here.diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 47f0650e3..5db099063 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15775,33 +15775,33 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
// predicate.
const SCEV *One = SE.getOne(RHS->getType());
switch (Predicate) {
- case CmpInst::ICMP_ULT:
- if (RHS->getType()->isPointerTy())
- return;
- RHS = SE.getUMaxExpr(RHS, One);
- [[fallthrough]];
- case CmpInst::ICMP_SLT: {
- RHS = SE.getMinusSCEV(RHS, One);
- RHS = DividesBy ? getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE)
- : RHS;
- break;
- }
- case CmpInst::ICMP_UGT:
- case CmpInst::ICMP_SGT:
- RHS = SE.getAddExpr(RHS, One);
- RHS = DividesBy ? getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE) : RHS;
- break;
- case CmpInst::ICMP_ULE:
- case CmpInst::ICMP_SLE:
- RHS = DividesBy ? getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE)
- : RHS;
- break;
- case CmpInst::ICMP_UGE:
- case CmpInst::ICMP_SGE:
- RHS = DividesBy ? getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE) : RHS;
- break;
- default:
- break;
+ case CmpInst::ICMP_ULT:
+ if (RHS->getType()->isPointerTy())
+ return;
+ RHS = SE.getUMaxExpr(RHS, One);
+ [[fallthrough]];
+ case CmpInst::ICMP_SLT: {
+ RHS = SE.getMinusSCEV(RHS, One);
+ RHS = DividesBy ? getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE)
+ : RHS;
+ break;
+ }
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_SGT:
+ RHS = SE.getAddExpr(RHS, One);
+ RHS = DividesBy ? getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE) : RHS;
+ break;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SLE:
+ RHS = DividesBy ? getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE)
+ : RHS;
+ break;
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGE:
+ RHS = DividesBy ? getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE) : RHS;
+ break;
+ default:
+ break;
}
SmallVector<const SCEV *, 16> Worklist(1, LHS);
|
When collecting information from loop guards, use UMax(1, %b - %a) for ICMP NE %a, %b, if neither are constant. This improves results in some cases, and will be even more useful together with * llvm#160012 * llvm#159942 https://alive2.llvm.org/ce/z/YyBvoT
When collecting information from loop guards, use UMax(1, %b - %a) for ICMP NE %a, %b, if neither are constant. This improves results in some cases, and will be even more useful together with * llvm#160012 * llvm#159942 https://alive2.llvm.org/ce/z/YyBvoT
When collecting information from loop guards, use UMax(1, %b - %a) for ICMP NE %a, %b, if neither are constant. This improves results in some cases, and will be even more useful together with * llvm#160012 * llvm#159942 https://alive2.llvm.org/ce/z/YyBvoT
When re-writing SCEVAddExprs to apply information from guards, check if
we have information for the expression itself. If so, apply it.
When we have an expression of the form (Const + A), check if we have
have guard info for (Const + 1 + A) and use it. This is needed to avoid
regressions in a few cases, where we have BTCs with a subtracted
constant.
Rewriting expressions could cause regressions, e.g. when comparing 2
SCEV expressions where we are only able to rewrite one side, but I could
not find any cases where this happens more with this patch in practice.
Depends on #160012 (included in
PR)
Proofs for some of the test changes: https://alive2.llvm.org/ce/z/RPX6t_