@@ -193,30 +193,28 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
193
193
// / Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
194
194
// / return nullptr. \p A and \p B must have the same type.
195
195
static const SCEV *addSCEVNoOverflow (const SCEV *A, const SCEV *B,
196
- ScalarEvolution &SE,
197
- const Instruction *CtxI) {
198
- if (!SE.willNotOverflow (Instruction::Add, /* IsSigned=*/ false , A, B, CtxI))
196
+ ScalarEvolution &SE) {
197
+ if (!SE.willNotOverflow (Instruction::Add, /* IsSigned=*/ false , A, B))
199
198
return nullptr ;
200
199
return SE.getAddExpr (A, B);
201
200
}
202
201
203
202
// / Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
204
203
// / return nullptr. \p A and \p B must have the same type.
205
204
static const SCEV *mulSCEVOverflow (const SCEV *A, const SCEV *B,
206
- ScalarEvolution &SE,
207
- const Instruction *CtxI) {
208
- if (!SE.willNotOverflow (Instruction::Mul, /* IsSigned=*/ false , A, B, CtxI))
205
+ ScalarEvolution &SE) {
206
+ if (!SE.willNotOverflow (Instruction::Mul, /* IsSigned=*/ false , A, B))
209
207
return nullptr ;
210
208
return SE.getMulExpr (A, B);
211
209
}
212
210
213
211
// / Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
214
212
// / \p MaxBTC is guaranteed inbounds of the accessed object.
215
- static bool
216
- evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR,
217
- const SCEV *MaxBTC, const SCEV *EltSize ,
218
- ScalarEvolution &SE, const DataLayout &DL ,
219
- DominatorTree *DT, AssumptionCache *AC ) {
213
+ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap (
214
+ const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize ,
215
+ ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT ,
216
+ AssumptionCache *AC ,
217
+ std::optional<ScalarEvolution::LoopGuards> &LoopGuards ) {
220
218
auto *PointerBase = SE.getPointerBase (AR->getStart ());
221
219
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
222
220
if (!StartPtr)
@@ -234,12 +232,11 @@ evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
234
232
Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
235
233
const SCEV *DerefBytesSCEV = SE.getConstant (WiderTy, DerefBytes);
236
234
237
- // Context which dominates the entire loop.
238
- auto *CtxI = L->getLoopPredecessor ()->getTerminator ();
239
235
// Check if we have a suitable dereferencable assumption we can use.
240
236
if (!StartPtrV->canBeFreed ()) {
241
237
RetainedKnowledge DerefRK = getKnowledgeValidInContext (
242
- StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT);
238
+ StartPtrV, {Attribute::Dereferenceable}, *AC,
239
+ L->getLoopPredecessor ()->getTerminator (), DT);
243
240
if (DerefRK) {
244
241
DerefBytesSCEV = SE.getUMaxExpr (
245
242
DerefBytesSCEV, SE.getConstant (WiderTy, DerefRK.ArgValue ));
@@ -263,23 +260,39 @@ evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
263
260
SE.getMinusSCEV (AR->getStart (), StartPtr), WiderTy);
264
261
265
262
const SCEV *OffsetAtLastIter =
266
- mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE, CtxI);
267
- if (!OffsetAtLastIter)
268
- return false ;
263
+ mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE);
264
+ if (!OffsetAtLastIter) {
265
+ // Re-try with constant max backedge-taken count if using the symbolic one
266
+ // failed.
267
+ MaxBTC = SE.getConstantMaxBackedgeTakenCount (AR->getLoop ());
268
+ if (isa<SCEVCouldNotCompute>(MaxBTC))
269
+ return false ;
270
+ MaxBTC = SE.getNoopOrZeroExtend (
271
+ MaxBTC, WiderTy);
272
+ OffsetAtLastIter =
273
+ mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE);
274
+ if (!OffsetAtLastIter)
275
+ return false ;
276
+ }
269
277
270
278
const SCEV *OffsetEndBytes = addSCEVNoOverflow (
271
- OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE, CtxI );
279
+ OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE);
272
280
if (!OffsetEndBytes)
273
281
return false ;
274
282
275
283
if (IsKnownNonNegative) {
276
284
// For positive steps, check if
277
285
// (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
278
286
// while making sure none of the computations unsigned wrap themselves.
279
- const SCEV *EndBytes =
280
- addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE, CtxI);
287
+ const SCEV *EndBytes = addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE);
281
288
if (!EndBytes)
282
289
return false ;
290
+
291
+ if (!LoopGuards)
292
+ LoopGuards.emplace (
293
+ ScalarEvolution::LoopGuards::collect (AR->getLoop (), SE));
294
+
295
+ EndBytes = SE.applyLoopGuards (EndBytes, *LoopGuards);
283
296
return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
284
297
}
285
298
@@ -296,7 +309,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
296
309
const SCEV *MaxBTC, ScalarEvolution *SE,
297
310
DenseMap<std::pair<const SCEV *, Type *>,
298
311
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
299
- DominatorTree *DT, AssumptionCache *AC) {
312
+ DominatorTree *DT, AssumptionCache *AC,
313
+ std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
300
314
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
301
315
if (PointerBounds) {
302
316
auto [Iter, Ins] = PointerBounds->insert (
@@ -332,7 +346,7 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
332
346
// separately checks that accesses cannot not wrap, so unsigned max
333
347
// represents an upper bound.
334
348
if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE, DL,
335
- DT, AC)) {
349
+ DT, AC, LoopGuards )) {
336
350
ScEnd = AR->evaluateAtIteration (MaxBTC, *SE);
337
351
} else {
338
352
ScEnd = SE->getAddExpr (
@@ -381,7 +395,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
381
395
const SCEV *BTC = PSE.getBackedgeTakenCount ();
382
396
const auto &[ScStart, ScEnd] = getStartAndEndForAccess (
383
397
Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE (),
384
- &DC.getPointerBounds (), DC.getDT (), DC.getAC ());
398
+ &DC.getPointerBounds (), DC.getDT (), DC.getAC (), LoopGuards );
385
399
assert (!isa<SCEVCouldNotCompute>(ScStart) &&
386
400
!isa<SCEVCouldNotCompute>(ScEnd) &&
387
401
" must be able to compute both start and end expressions" );
@@ -1987,13 +2001,13 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
1987
2001
ScalarEvolution &SE = *PSE.getSE ();
1988
2002
const auto &[SrcStart_, SrcEnd_] =
1989
2003
getStartAndEndForAccess (InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
1990
- &SE, &PointerBounds, DT, AC);
2004
+ &SE, &PointerBounds, DT, AC, LoopGuards );
1991
2005
if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
1992
2006
return false ;
1993
2007
1994
2008
const auto &[SinkStart_, SinkEnd_] =
1995
2009
getStartAndEndForAccess (InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
1996
- &SE, &PointerBounds, DT, AC);
2010
+ &SE, &PointerBounds, DT, AC, LoopGuards );
1997
2011
if (isa<SCEVCouldNotCompute>(SinkStart_) ||
1998
2012
isa<SCEVCouldNotCompute>(SinkEnd_))
1999
2013
return false ;
@@ -3040,8 +3054,9 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
3040
3054
TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector) * 2 ;
3041
3055
3042
3056
DepChecker = std::make_unique<MemoryDepChecker>(
3043
- *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
3044
- PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
3057
+ *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3058
+ PtrRtChecking =
3059
+ std::make_unique<RuntimePointerChecking>(*DepChecker, SE, LoopGuards);
3045
3060
if (canAnalyzeLoop ())
3046
3061
CanVecMem = analyzeLoop (AA, LI, TLI, DT);
3047
3062
}
0 commit comments