@@ -193,30 +193,28 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
193
193
// / Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
194
194
// / return nullptr. \p A and \p B must have the same type.
195
195
static const SCEV *addSCEVNoOverflow (const SCEV *A, const SCEV *B,
196
- ScalarEvolution &SE,
197
- const Instruction *CtxI) {
198
- if (!SE.willNotOverflow (Instruction::Add, /* IsSigned=*/ false , A, B, CtxI))
196
+ ScalarEvolution &SE) {
197
+ if (!SE.willNotOverflow (Instruction::Add, /* IsSigned=*/ false , A, B))
199
198
return nullptr ;
200
199
return SE.getAddExpr (A, B);
201
200
}
202
201
203
202
// / Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
204
203
// / return nullptr. \p A and \p B must have the same type.
205
204
static const SCEV *mulSCEVOverflow (const SCEV *A, const SCEV *B,
206
- ScalarEvolution &SE,
207
- const Instruction *CtxI) {
208
- if (!SE.willNotOverflow (Instruction::Mul, /* IsSigned=*/ false , A, B, CtxI))
205
+ ScalarEvolution &SE) {
206
+ if (!SE.willNotOverflow (Instruction::Mul, /* IsSigned=*/ false , A, B))
209
207
return nullptr ;
210
208
return SE.getMulExpr (A, B);
211
209
}
212
210
213
211
// / Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
214
212
// / \p MaxBTC is guaranteed inbounds of the accessed object.
215
- static bool
216
- evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR,
217
- const SCEV *MaxBTC, const SCEV *EltSize ,
218
- ScalarEvolution &SE, const DataLayout &DL ,
219
- DominatorTree *DT, AssumptionCache *AC ) {
213
+ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap (
214
+ const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize ,
215
+ ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT ,
216
+ AssumptionCache *AC ,
217
+ std::optional<ScalarEvolution::LoopGuards> &LoopGuards ) {
220
218
auto *PointerBase = SE.getPointerBase (AR->getStart ());
221
219
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
222
220
if (!StartPtr)
@@ -234,12 +232,11 @@ evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
234
232
Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
235
233
const SCEV *DerefBytesSCEV = SE.getConstant (WiderTy, DerefBytes);
236
234
237
- // Context which dominates the entire loop.
238
- auto *CtxI = L->getLoopPredecessor ()->getTerminator ();
239
235
// Check if we have a suitable dereferencable assumption we can use.
240
236
if (!StartPtrV->canBeFreed ()) {
241
237
RetainedKnowledge DerefRK = getKnowledgeValidInContext (
242
- StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT);
238
+ StartPtrV, {Attribute::Dereferenceable}, *AC,
239
+ L->getLoopPredecessor ()->getTerminator (), DT);
243
240
if (DerefRK) {
244
241
DerefBytesSCEV = SE.getUMaxExpr (
245
242
DerefBytesSCEV, SE.getConstant (WiderTy, DerefRK.ArgValue ));
@@ -263,23 +260,36 @@ evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
263
260
SE.getMinusSCEV (AR->getStart (), StartPtr), WiderTy);
264
261
265
262
const SCEV *OffsetAtLastIter =
266
- mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE, CtxI);
267
- if (!OffsetAtLastIter)
268
- return false ;
263
+ mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE);
264
+ if (!OffsetAtLastIter) {
265
+ // Re-try with constant max backedge-taken count if using the symbolic one
266
+ // failed.
267
+ MaxBTC = SE.getNoopOrZeroExtend (
268
+ SE.getConstantMaxBackedgeTakenCount (AR->getLoop ()), WiderTy);
269
+ OffsetAtLastIter =
270
+ mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE);
271
+ if (!OffsetAtLastIter)
272
+ return false ;
273
+ }
269
274
270
275
const SCEV *OffsetEndBytes = addSCEVNoOverflow (
271
- OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE, CtxI );
276
+ OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE);
272
277
if (!OffsetEndBytes)
273
278
return false ;
274
279
275
280
if (IsKnownNonNegative) {
276
281
// For positive steps, check if
277
282
// (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
278
283
// while making sure none of the computations unsigned wrap themselves.
279
- const SCEV *EndBytes =
280
- addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE, CtxI);
284
+ const SCEV *EndBytes = addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE);
281
285
if (!EndBytes)
282
286
return false ;
287
+
288
+ if (!LoopGuards)
289
+ LoopGuards.emplace (
290
+ ScalarEvolution::LoopGuards::collect (AR->getLoop (), SE));
291
+
292
+ EndBytes = SE.applyLoopGuards (EndBytes, *LoopGuards);
283
293
return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
284
294
}
285
295
@@ -296,7 +306,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
296
306
const SCEV *MaxBTC, ScalarEvolution *SE,
297
307
DenseMap<std::pair<const SCEV *, Type *>,
298
308
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
299
- DominatorTree *DT, AssumptionCache *AC) {
309
+ DominatorTree *DT, AssumptionCache *AC,
310
+ std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
300
311
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
301
312
if (PointerBounds) {
302
313
auto [Iter, Ins] = PointerBounds->insert (
@@ -332,7 +343,7 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
332
343
// separately checks that accesses cannot not wrap, so unsigned max
333
344
// represents an upper bound.
334
345
if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE, DL,
335
- DT, AC)) {
346
+ DT, AC, LoopGuards )) {
336
347
ScEnd = AR->evaluateAtIteration (MaxBTC, *SE);
337
348
} else {
338
349
ScEnd = SE->getAddExpr (
@@ -381,7 +392,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
381
392
const SCEV *BTC = PSE.getBackedgeTakenCount ();
382
393
const auto &[ScStart, ScEnd] = getStartAndEndForAccess (
383
394
Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE (),
384
- &DC.getPointerBounds (), DC.getDT (), DC.getAC ());
395
+ &DC.getPointerBounds (), DC.getDT (), DC.getAC (), LoopGuards );
385
396
assert (!isa<SCEVCouldNotCompute>(ScStart) &&
386
397
!isa<SCEVCouldNotCompute>(ScEnd) &&
387
398
" must be able to compute both start and end expressions" );
@@ -1987,13 +1998,13 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
1987
1998
ScalarEvolution &SE = *PSE.getSE ();
1988
1999
const auto &[SrcStart_, SrcEnd_] =
1989
2000
getStartAndEndForAccess (InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
1990
- &SE, &PointerBounds, DT, AC);
2001
+ &SE, &PointerBounds, DT, AC, LoopGuards );
1991
2002
if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
1992
2003
return false ;
1993
2004
1994
2005
const auto &[SinkStart_, SinkEnd_] =
1995
2006
getStartAndEndForAccess (InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
1996
- &SE, &PointerBounds, DT, AC);
2007
+ &SE, &PointerBounds, DT, AC, LoopGuards );
1997
2008
if (isa<SCEVCouldNotCompute>(SinkStart_) ||
1998
2009
isa<SCEVCouldNotCompute>(SinkEnd_))
1999
2010
return false ;
@@ -3040,8 +3051,9 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
3040
3051
TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector) * 2 ;
3041
3052
3042
3053
DepChecker = std::make_unique<MemoryDepChecker>(
3043
- *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
3044
- PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
3054
+ *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3055
+ PtrRtChecking =
3056
+ std::make_unique<RuntimePointerChecking>(*DepChecker, SE, LoopGuards);
3045
3057
if (canAnalyzeLoop ())
3046
3058
CanVecMem = analyzeLoop (AA, LI, TLI, DT);
3047
3059
}
0 commit comments