Skip to content

Commit f03b341

Browse files
committed
[SCEV] Apply loop guards to End computeMaxBECountForLT
This is a follow on from llvm#115705. Applying the loop guard allows us to calculate the maximum trip count in more places, which in turn allows isIndvarOverflowCheckKnownFalse to skip the overflow check.
1 parent d119d43 commit f03b341

File tree

3 files changed

+14
-18
lines changed

3 files changed

+14
-18
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2218,8 +2218,8 @@ class ScalarEvolution {
22182218
/// actually doesn't, or we'd have to immediately execute UB)
22192219
/// We *don't* assert these preconditions so please be careful.
22202220
const SCEV *computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride,
2221-
const SCEV *End, unsigned BitWidth,
2222-
bool IsSigned);
2221+
const SCEV *End, const Loop *L,
2222+
unsigned BitWidth, bool IsSigned);
22232223

22242224
/// Verify if an linear IV with positive stride can overflow when in a
22252225
/// less-than comparison, knowing the invariant term of the comparison,

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12857,11 +12857,10 @@ const SCEV *ScalarEvolution::getUDivCeilSCEV(const SCEV *N, const SCEV *D) {
1285712857
return getAddExpr(MinNOne, getUDivExpr(NMinusOne, D));
1285812858
}
1285912859

12860-
const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start,
12861-
const SCEV *Stride,
12862-
const SCEV *End,
12863-
unsigned BitWidth,
12864-
bool IsSigned) {
12860+
const SCEV *
12861+
ScalarEvolution::computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride,
12862+
const SCEV *End, const Loop *L,
12863+
unsigned BitWidth, bool IsSigned) {
1286512864
// The logic in this function assumes we can represent a positive stride.
1286612865
// If we can't, the backedge-taken count must be zero.
1286712866
if (IsSigned && BitWidth == 1)
@@ -12895,8 +12894,10 @@ const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start,
1289512894
// the case End = RHS of the loop termination condition. This is safe because
1289612895
// in the other case (End - Start) is zero, leading to a zero maximum backedge
1289712896
// taken count.
12898-
APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit)
12899-
: APIntOps::umin(getUnsignedRangeMax(End), Limit);
12897+
const SCEV *GuardedEnd = applyLoopGuards(End, L);
12898+
APInt MaxEnd = IsSigned
12899+
? APIntOps::smin(getSignedRangeMax(GuardedEnd), Limit)
12900+
: APIntOps::umin(getUnsignedRangeMax(GuardedEnd), Limit);
1290012901

1290112902
// MaxBECount = ceil((max(MaxEnd, MinStart) - MinStart) / Stride)
1290212903
MaxEnd = IsSigned ? APIntOps::smax(MaxEnd, MinStart)
@@ -13150,7 +13151,7 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
1315013151
// loop (RHS), and the fact that IV does not overflow (which is
1315113152
// checked above).
1315213153
const SCEV *MaxBECount = computeMaxBECountForLT(
13153-
Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
13154+
Start, Stride, RHS, L, getTypeSizeInBits(LHS->getType()), IsSigned);
1315413155
return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount,
1315513156
MaxBECount, false /*MaxOrZero*/, Predicates);
1315613157
}
@@ -13334,7 +13335,7 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
1333413335
MaxOrZero = true;
1333513336
} else {
1333613337
ConstantMaxBECount = computeMaxBECountForLT(
13337-
Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
13338+
Start, Stride, RHS, L, getTypeSizeInBits(LHS->getType()), IsSigned);
1333813339
}
1333913340

1334013341
if (isa<SCEVCouldNotCompute>(ConstantMaxBECount) &&

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
55
; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s
66

7-
; TODO: We know the IV will never overflow here so we can skip the overflow
8-
; check
7+
; We know the IV will never overflow here so we can skip the overflow check
98

109
define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) {
1110
; CHECK-LABEL: define void @trip_count_max_1024(
@@ -15,11 +14,7 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) {
1514
; CHECK-NEXT: br i1 [[GUARD]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
1615
; CHECK: [[LOOP_PREHEADER]]:
1716
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TC]], i64 1)
18-
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[UMAX]]
19-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
20-
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
21-
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
22-
; CHECK-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
17+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
2318
; CHECK: [[VECTOR_PH]]:
2419
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
2520
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2

0 commit comments

Comments
 (0)