@@ -193,28 +193,30 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
193
193
// / Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
194
194
// / return nullptr. \p A and \p B must have the same type.
195
195
static const SCEV *addSCEVNoOverflow (const SCEV *A, const SCEV *B,
196
- ScalarEvolution &SE) {
197
- if (!SE.willNotOverflow (Instruction::Add, /* IsSigned=*/ false , A, B))
196
+ ScalarEvolution &SE,
197
+ const Instruction *CtxI) {
198
+ if (!SE.willNotOverflow (Instruction::Add, /* IsSigned=*/ false , A, B, CtxI))
198
199
return nullptr ;
199
200
return SE.getAddExpr (A, B);
200
201
}
201
202
202
203
// / Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
203
204
// / return nullptr. \p A and \p B must have the same type.
204
205
static const SCEV *mulSCEVOverflow (const SCEV *A, const SCEV *B,
205
- ScalarEvolution &SE) {
206
- if (!SE.willNotOverflow (Instruction::Mul, /* IsSigned=*/ false , A, B))
206
+ ScalarEvolution &SE,
207
+ const Instruction *CtxI) {
208
+ if (!SE.willNotOverflow (Instruction::Mul, /* IsSigned=*/ false , A, B, CtxI))
207
209
return nullptr ;
208
210
return SE.getMulExpr (A, B);
209
211
}
210
212
211
213
// / Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
212
214
// / \p MaxBTC is guaranteed inbounds of the accessed object.
213
- static bool evaluatePtrAddRecAtMaxBTCWillNotWrap (
214
- const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize ,
215
- ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT ,
216
- AssumptionCache *AC ,
217
- std::optional<ScalarEvolution::LoopGuards> &LoopGuards ) {
215
+ static bool
216
+ evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR,
217
+ const SCEV *MaxBTC, const SCEV *EltSize ,
218
+ ScalarEvolution &SE, const DataLayout &DL ,
219
+ DominatorTree *DT, AssumptionCache *AC ) {
218
220
auto *PointerBase = SE.getPointerBase (AR->getStart ());
219
221
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
220
222
if (!StartPtr)
@@ -232,11 +234,12 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(
232
234
Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
233
235
const SCEV *DerefBytesSCEV = SE.getConstant (WiderTy, DerefBytes);
234
236
237
+ // Context which dominates the entire loop.
238
+ auto *CtxI = L->getLoopPredecessor ()->getTerminator ();
235
239
// Check if we have a suitable dereferencable assumption we can use.
236
240
if (!StartPtrV->canBeFreed ()) {
237
241
RetainedKnowledge DerefRK = getKnowledgeValidInContext (
238
- StartPtrV, {Attribute::Dereferenceable}, *AC,
239
- L->getLoopPredecessor ()->getTerminator (), DT);
242
+ StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT);
240
243
if (DerefRK) {
241
244
DerefBytesSCEV = SE.getUMaxExpr (
242
245
DerefBytesSCEV, SE.getConstant (WiderTy, DerefRK.ArgValue ));
@@ -260,36 +263,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(
260
263
SE.getMinusSCEV (AR->getStart (), StartPtr), WiderTy);
261
264
262
265
const SCEV *OffsetAtLastIter =
263
- mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE);
264
- if (!OffsetAtLastIter) {
265
- // Re-try with constant max backedge-taken count if using the symbolic one
266
- // failed.
267
- MaxBTC = SE.getNoopOrZeroExtend (
268
- SE.getConstantMaxBackedgeTakenCount (AR->getLoop ()), WiderTy);
269
- OffsetAtLastIter =
270
- mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE);
271
- if (!OffsetAtLastIter)
272
- return false ;
273
- }
266
+ mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE, CtxI);
267
+ if (!OffsetAtLastIter)
268
+ return false ;
274
269
275
270
const SCEV *OffsetEndBytes = addSCEVNoOverflow (
276
- OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE);
271
+ OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE, CtxI );
277
272
if (!OffsetEndBytes)
278
273
return false ;
279
274
280
275
if (IsKnownNonNegative) {
281
276
// For positive steps, check if
282
277
// (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
283
278
// while making sure none of the computations unsigned wrap themselves.
284
- const SCEV *EndBytes = addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE);
279
+ const SCEV *EndBytes =
280
+ addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE, CtxI);
285
281
if (!EndBytes)
286
282
return false ;
287
-
288
- if (!LoopGuards)
289
- LoopGuards.emplace (
290
- ScalarEvolution::LoopGuards::collect (AR->getLoop (), SE));
291
-
292
- EndBytes = SE.applyLoopGuards (EndBytes, *LoopGuards);
293
283
return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
294
284
}
295
285
@@ -306,8 +296,7 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
306
296
const SCEV *MaxBTC, ScalarEvolution *SE,
307
297
DenseMap<std::pair<const SCEV *, Type *>,
308
298
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
309
- DominatorTree *DT, AssumptionCache *AC,
310
- std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
299
+ DominatorTree *DT, AssumptionCache *AC) {
311
300
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
312
301
if (PointerBounds) {
313
302
auto [Iter, Ins] = PointerBounds->insert (
@@ -343,7 +332,7 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
343
332
// separately checks that accesses cannot not wrap, so unsigned max
344
333
// represents an upper bound.
345
334
if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE, DL,
346
- DT, AC, LoopGuards )) {
335
+ DT, AC)) {
347
336
ScEnd = AR->evaluateAtIteration (MaxBTC, *SE);
348
337
} else {
349
338
ScEnd = SE->getAddExpr (
@@ -392,7 +381,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
392
381
const SCEV *BTC = PSE.getBackedgeTakenCount ();
393
382
const auto &[ScStart, ScEnd] = getStartAndEndForAccess (
394
383
Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE (),
395
- &DC.getPointerBounds (), DC.getDT (), DC.getAC (), LoopGuards );
384
+ &DC.getPointerBounds (), DC.getDT (), DC.getAC ());
396
385
assert (!isa<SCEVCouldNotCompute>(ScStart) &&
397
386
!isa<SCEVCouldNotCompute>(ScEnd) &&
398
387
" must be able to compute both start and end expressions" );
@@ -1998,13 +1987,13 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
1998
1987
ScalarEvolution &SE = *PSE.getSE ();
1999
1988
const auto &[SrcStart_, SrcEnd_] =
2000
1989
getStartAndEndForAccess (InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
2001
- &SE, &PointerBounds, DT, AC, LoopGuards );
1990
+ &SE, &PointerBounds, DT, AC);
2002
1991
if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
2003
1992
return false ;
2004
1993
2005
1994
const auto &[SinkStart_, SinkEnd_] =
2006
1995
getStartAndEndForAccess (InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
2007
- &SE, &PointerBounds, DT, AC, LoopGuards );
1996
+ &SE, &PointerBounds, DT, AC);
2008
1997
if (isa<SCEVCouldNotCompute>(SinkStart_) ||
2009
1998
isa<SCEVCouldNotCompute>(SinkEnd_))
2010
1999
return false ;
@@ -3051,9 +3040,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
3051
3040
TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector) * 2 ;
3052
3041
3053
3042
DepChecker = std::make_unique<MemoryDepChecker>(
3054
- *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3055
- PtrRtChecking =
3056
- std::make_unique<RuntimePointerChecking>(*DepChecker, SE, LoopGuards);
3043
+ *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
3044
+ PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
3057
3045
if (canAnalyzeLoop ())
3058
3046
CanVecMem = analyzeLoop (AA, LI, TLI, DT);
3059
3047
}
0 commit comments