Skip to content

Commit f0df1e3

Browse files
committed
Revert "[LAA,Loads] Use loop guards and max BTC if needed when checking deref. (#155672)"
This reverts commit 08001cf. This triggers an assertion in some build configs, e.g. https://lab.llvm.org/buildbot/#/builders/24/builds/12211
1 parent 2429a8f commit f0df1e3

File tree

5 files changed

+65
-70
lines changed

5 files changed

+65
-70
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -183,12 +183,10 @@ class MemoryDepChecker {
183183
MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC,
184184
DominatorTree *DT, const Loop *L,
185185
const DenseMap<Value *, const SCEV *> &SymbolicStrides,
186-
unsigned MaxTargetVectorWidthInBits,
187-
std::optional<ScalarEvolution::LoopGuards> &LoopGuards)
186+
unsigned MaxTargetVectorWidthInBits)
188187
: PSE(PSE), AC(AC), DT(DT), InnermostLoop(L),
189188
SymbolicStrides(SymbolicStrides),
190-
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits),
191-
LoopGuards(LoopGuards) {}
189+
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
192190

193191
/// Register the location (instructions are given increasing numbers)
194192
/// of a write access.
@@ -375,7 +373,7 @@ class MemoryDepChecker {
375373
PointerBounds;
376374

377375
/// Cache for the loop guards of InnermostLoop.
378-
std::optional<ScalarEvolution::LoopGuards> &LoopGuards;
376+
std::optional<ScalarEvolution::LoopGuards> LoopGuards;
379377

380378
/// Check whether there is a plausible dependence between the two
381379
/// accesses.
@@ -533,9 +531,8 @@ class RuntimePointerChecking {
533531
AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {}
534532
};
535533

536-
RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE,
537-
std::optional<ScalarEvolution::LoopGuards> &LoopGuards)
538-
: DC(DC), SE(SE), LoopGuards(LoopGuards) {}
534+
RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE)
535+
: DC(DC), SE(SE) {}
539536

540537
/// Reset the state of the pointer runtime information.
541538
void reset() {
@@ -649,9 +646,6 @@ class RuntimePointerChecking {
649646
/// Holds a pointer to the ScalarEvolution analysis.
650647
ScalarEvolution *SE;
651648

652-
/// Cache for the loop guards of the loop.
653-
std::optional<ScalarEvolution::LoopGuards> &LoopGuards;
654-
655649
/// Set of run-time checks required to establish independence of
656650
/// otherwise may-aliasing pointers in the loop.
657651
SmallVector<RuntimePointerCheck, 4> Checks;
@@ -827,9 +821,6 @@ class LoopAccessInfo {
827821

828822
Loop *TheLoop;
829823

830-
/// Cache for the loop guards of TheLoop.
831-
std::optional<ScalarEvolution::LoopGuards> LoopGuards;
832-
833824
/// Determines whether we should generate partial runtime checks when not all
834825
/// memory accesses could be analyzed.
835826
bool AllowPartial;
@@ -947,8 +938,7 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
947938
const SCEV *MaxBTC, ScalarEvolution *SE,
948939
DenseMap<std::pair<const SCEV *, Type *>,
949940
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
950-
DominatorTree *DT, AssumptionCache *AC,
951-
std::optional<ScalarEvolution::LoopGuards> &LoopGuards);
941+
DominatorTree *DT, AssumptionCache *AC);
952942

953943
class LoopAccessInfoManager {
954944
/// The cache.

llvm/lib/Analysis/Loads.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626

2727
using namespace llvm;
2828

29+
static cl::opt<bool>
30+
UseSymbolicMaxBTCForDerefInLoop("use-symbolic-maxbtc-deref-loop",
31+
cl::init(false));
32+
2933
static bool isAligned(const Value *Base, Align Alignment,
3034
const DataLayout &DL) {
3135
return Base->getPointerAlignment(DL) >= Alignment;
@@ -331,10 +335,18 @@ bool llvm::isDereferenceableAndAlignedInLoop(
331335
: SE.getBackedgeTakenCount(L);
332336
if (isa<SCEVCouldNotCompute>(MaxBECount))
333337
return false;
334-
std::optional<ScalarEvolution::LoopGuards> LoopGuards;
335-
const auto &[AccessStart, AccessEnd] =
336-
getStartAndEndForAccess(L, PtrScev, LI->getType(), BECount, MaxBECount,
337-
&SE, nullptr, &DT, AC, LoopGuards);
338+
339+
if (isa<SCEVCouldNotCompute>(BECount) && !UseSymbolicMaxBTCForDerefInLoop) {
340+
// TODO: Support symbolic max backedge taken counts for loops without
341+
// computable backedge taken counts.
342+
MaxBECount =
343+
Predicates
344+
? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
345+
: SE.getConstantMaxBackedgeTakenCount(L);
346+
}
347+
348+
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
349+
L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr, &DT, AC);
338350
if (isa<SCEVCouldNotCompute>(AccessStart) ||
339351
isa<SCEVCouldNotCompute>(AccessEnd))
340352
return false;
@@ -343,13 +355,10 @@ bool llvm::isDereferenceableAndAlignedInLoop(
343355
const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart);
344356
if (isa<SCEVCouldNotCompute>(PtrDiff))
345357
return false;
346-
347-
if (!LoopGuards)
348-
LoopGuards.emplace(
349-
ScalarEvolution::LoopGuards::collect(AddRec->getLoop(), SE));
350-
358+
ScalarEvolution::LoopGuards LoopGuards =
359+
ScalarEvolution::LoopGuards::collect(AddRec->getLoop(), SE);
351360
APInt MaxPtrDiff =
352-
SE.getUnsignedRangeMax(SE.applyLoopGuards(PtrDiff, *LoopGuards));
361+
SE.getUnsignedRangeMax(SE.applyLoopGuards(PtrDiff, LoopGuards));
353362

354363
Value *Base = nullptr;
355364
APInt AccessSize;
@@ -395,7 +404,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(
395404
[&SE, AccessSizeSCEV, &LoopGuards](const RetainedKnowledge &RK) {
396405
return SE.isKnownPredicate(
397406
CmpInst::ICMP_ULE, AccessSizeSCEV,
398-
SE.applyLoopGuards(SE.getSCEV(RK.IRArgValue), *LoopGuards));
407+
SE.applyLoopGuards(SE.getSCEV(RK.IRArgValue), LoopGuards));
399408
},
400409
DL, HeaderFirstNonPHI, AC, &DT) ||
401410
isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 27 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -193,28 +193,30 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
193193
/// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
194194
/// return nullptr. \p A and \p B must have the same type.
195195
static const SCEV *addSCEVNoOverflow(const SCEV *A, const SCEV *B,
196-
ScalarEvolution &SE) {
197-
if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/false, A, B))
196+
ScalarEvolution &SE,
197+
const Instruction *CtxI) {
198+
if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/false, A, B, CtxI))
198199
return nullptr;
199200
return SE.getAddExpr(A, B);
200201
}
201202

202203
/// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
203204
/// return nullptr. \p A and \p B must have the same type.
204205
static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
205-
ScalarEvolution &SE) {
206-
if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/false, A, B))
206+
ScalarEvolution &SE,
207+
const Instruction *CtxI) {
208+
if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/false, A, B, CtxI))
207209
return nullptr;
208210
return SE.getMulExpr(A, B);
209211
}
210212

211213
/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
212214
/// \p MaxBTC is guaranteed inbounds of the accessed object.
213-
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(
214-
const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize,
215-
ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT,
216-
AssumptionCache *AC,
217-
std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
215+
static bool
216+
evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
217+
const SCEV *MaxBTC, const SCEV *EltSize,
218+
ScalarEvolution &SE, const DataLayout &DL,
219+
DominatorTree *DT, AssumptionCache *AC) {
218220
auto *PointerBase = SE.getPointerBase(AR->getStart());
219221
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
220222
if (!StartPtr)
@@ -232,11 +234,12 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(
232234
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
233235
const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
234236

237+
// Context which dominates the entire loop.
238+
auto *CtxI = L->getLoopPredecessor()->getTerminator();
235239
// Check if we have a suitable dereferencable assumption we can use.
236240
if (!StartPtrV->canBeFreed()) {
237241
RetainedKnowledge DerefRK = getKnowledgeValidInContext(
238-
StartPtrV, {Attribute::Dereferenceable}, *AC,
239-
L->getLoopPredecessor()->getTerminator(), DT);
242+
StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT);
240243
if (DerefRK) {
241244
DerefBytesSCEV = SE.getUMaxExpr(
242245
DerefBytesSCEV, SE.getConstant(WiderTy, DerefRK.ArgValue));
@@ -260,36 +263,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(
260263
SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
261264

262265
const SCEV *OffsetAtLastIter =
263-
mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE);
264-
if (!OffsetAtLastIter) {
265-
// Re-try with constant max backedge-taken count if using the symbolic one
266-
// failed.
267-
MaxBTC = SE.getNoopOrZeroExtend(
268-
SE.getConstantMaxBackedgeTakenCount(AR->getLoop()), WiderTy);
269-
OffsetAtLastIter =
270-
mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE);
271-
if (!OffsetAtLastIter)
272-
return false;
273-
}
266+
mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE, CtxI);
267+
if (!OffsetAtLastIter)
268+
return false;
274269

275270
const SCEV *OffsetEndBytes = addSCEVNoOverflow(
276-
OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE);
271+
OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE, CtxI);
277272
if (!OffsetEndBytes)
278273
return false;
279274

280275
if (IsKnownNonNegative) {
281276
// For positive steps, check if
282277
// (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
283278
// while making sure none of the computations unsigned wrap themselves.
284-
const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE);
279+
const SCEV *EndBytes =
280+
addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE, CtxI);
285281
if (!EndBytes)
286282
return false;
287-
288-
if (!LoopGuards)
289-
LoopGuards.emplace(
290-
ScalarEvolution::LoopGuards::collect(AR->getLoop(), SE));
291-
292-
EndBytes = SE.applyLoopGuards(EndBytes, *LoopGuards);
293283
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
294284
}
295285

@@ -306,8 +296,7 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
306296
const SCEV *MaxBTC, ScalarEvolution *SE,
307297
DenseMap<std::pair<const SCEV *, Type *>,
308298
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
309-
DominatorTree *DT, AssumptionCache *AC,
310-
std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
299+
DominatorTree *DT, AssumptionCache *AC) {
311300
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
312301
if (PointerBounds) {
313302
auto [Iter, Ins] = PointerBounds->insert(
@@ -343,7 +332,7 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
343332
// separately checks that accesses cannot not wrap, so unsigned max
344333
// represents an upper bound.
345334
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL,
346-
DT, AC, LoopGuards)) {
335+
DT, AC)) {
347336
ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
348337
} else {
349338
ScEnd = SE->getAddExpr(
@@ -392,7 +381,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
392381
const SCEV *BTC = PSE.getBackedgeTakenCount();
393382
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
394383
Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
395-
&DC.getPointerBounds(), DC.getDT(), DC.getAC(), LoopGuards);
384+
&DC.getPointerBounds(), DC.getDT(), DC.getAC());
396385
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
397386
!isa<SCEVCouldNotCompute>(ScEnd) &&
398387
"must be able to compute both start and end expressions");
@@ -1998,13 +1987,13 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
19981987
ScalarEvolution &SE = *PSE.getSE();
19991988
const auto &[SrcStart_, SrcEnd_] =
20001989
getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
2001-
&SE, &PointerBounds, DT, AC, LoopGuards);
1990+
&SE, &PointerBounds, DT, AC);
20021991
if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
20031992
return false;
20041993

20051994
const auto &[SinkStart_, SinkEnd_] =
20061995
getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
2007-
&SE, &PointerBounds, DT, AC, LoopGuards);
1996+
&SE, &PointerBounds, DT, AC);
20081997
if (isa<SCEVCouldNotCompute>(SinkStart_) ||
20091998
isa<SCEVCouldNotCompute>(SinkEnd_))
20101999
return false;
@@ -3051,9 +3040,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30513040
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;
30523041

30533042
DepChecker = std::make_unique<MemoryDepChecker>(
3054-
*PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3055-
PtrRtChecking =
3056-
std::make_unique<RuntimePointerChecking>(*DepChecker, SE, LoopGuards);
3043+
*PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
3044+
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
30573045
if (canAnalyzeLoop())
30583046
CanVecMem = analyzeLoop(AA, LI, TLI, DT);
30593047
}

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2338,15 +2338,23 @@ bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
23382338
// Can we use context to prove the fact we need?
23392339
if (!CtxI)
23402340
return false;
2341-
// TODO: Support mul.
2342-
if (BinOp == Instruction::Mul)
2343-
return false;
23442341
auto *RHSC = dyn_cast<SCEVConstant>(RHS);
23452342
// TODO: Lift this limitation.
23462343
if (!RHSC)
23472344
return false;
23482345
APInt C = RHSC->getAPInt();
23492346
unsigned NumBits = C.getBitWidth();
2347+
if (BinOp == Instruction::Mul) {
2348+
// Multiplying by 0 or 1 never overflows
2349+
if (C.isZero() || C.isOne())
2350+
return true;
2351+
if (Signed)
2352+
return false;
2353+
APInt Limit = APInt::getMaxValue(NumBits).udiv(C);
2354+
// To avoid overflow, we need to make sure that LHS <= MAX / C.
2355+
return isKnownPredicateAt(ICmpInst::ICMP_ULE, LHS, getConstant(Limit),
2356+
CtxI);
2357+
}
23502358
bool IsSub = (BinOp == Instruction::Sub);
23512359
bool IsNegativeConst = (Signed && C.isNegative());
23522360
// Compute the direction and magnitude by which we need to check overflow.

llvm/test/Transforms/LoopVectorize/vect.stats.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -enable-early-exit-vectorization --disable-output -stats -S 2>&1 | FileCheck %s
1+
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -enable-early-exit-vectorization -use-symbolic-maxbtc-deref-loop --disable-output -stats -S 2>&1 | FileCheck %s
22
; REQUIRES: asserts
33

44
; We have 3 loops, two of them are vectorizable (with one being early-exit

0 commit comments

Comments
 (0)