@@ -193,28 +193,30 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
193193// / Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
194194// / return nullptr. \p A and \p B must have the same type.
195195static const SCEV *addSCEVNoOverflow (const SCEV *A, const SCEV *B,
196- ScalarEvolution &SE) {
197- if (!SE.willNotOverflow (Instruction::Add, /* IsSigned=*/ false , A, B))
196+ ScalarEvolution &SE,
197+ const Instruction *CtxI) {
198+ if (!SE.willNotOverflow (Instruction::Add, /* IsSigned=*/ false , A, B, CtxI))
198199 return nullptr ;
199200 return SE.getAddExpr (A, B);
200201}
201202
202203// / Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
203204// / return nullptr. \p A and \p B must have the same type.
204205static const SCEV *mulSCEVOverflow (const SCEV *A, const SCEV *B,
205- ScalarEvolution &SE) {
206- if (!SE.willNotOverflow (Instruction::Mul, /* IsSigned=*/ false , A, B))
206+ ScalarEvolution &SE,
207+ const Instruction *CtxI) {
208+ if (!SE.willNotOverflow (Instruction::Mul, /* IsSigned=*/ false , A, B, CtxI))
207209 return nullptr ;
208210 return SE.getMulExpr (A, B);
209211}
210212
211213// / Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
212214// / \p MaxBTC is guaranteed inbounds of the accessed object.
213- static bool evaluatePtrAddRecAtMaxBTCWillNotWrap (
214- const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize ,
215- ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT ,
216- AssumptionCache *AC ,
217- std::optional<ScalarEvolution::LoopGuards> &LoopGuards ) {
215+ static bool
216+ evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR,
217+ const SCEV *MaxBTC, const SCEV *EltSize ,
218+ ScalarEvolution &SE, const DataLayout &DL ,
219+ DominatorTree *DT, AssumptionCache *AC ) {
218220 auto *PointerBase = SE.getPointerBase (AR->getStart ());
219221 auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
220222 if (!StartPtr)
@@ -232,11 +234,12 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(
232234 Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
233235 const SCEV *DerefBytesSCEV = SE.getConstant (WiderTy, DerefBytes);
234236
237+ // Context which dominates the entire loop.
238+ auto *CtxI = L->getLoopPredecessor ()->getTerminator ();
235239 // Check if we have a suitable dereferencable assumption we can use.
236240 if (!StartPtrV->canBeFreed ()) {
237241 RetainedKnowledge DerefRK = getKnowledgeValidInContext (
238- StartPtrV, {Attribute::Dereferenceable}, *AC,
239- L->getLoopPredecessor ()->getTerminator (), DT);
242+ StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT);
240243 if (DerefRK) {
241244 DerefBytesSCEV = SE.getUMaxExpr (
242245 DerefBytesSCEV, SE.getConstant (WiderTy, DerefRK.ArgValue ));
@@ -260,36 +263,23 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(
260263 SE.getMinusSCEV (AR->getStart (), StartPtr), WiderTy);
261264
262265 const SCEV *OffsetAtLastIter =
263- mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE);
264- if (!OffsetAtLastIter) {
265- // Re-try with constant max backedge-taken count if using the symbolic one
266- // failed.
267- MaxBTC = SE.getNoopOrZeroExtend (
268- SE.getConstantMaxBackedgeTakenCount (AR->getLoop ()), WiderTy);
269- OffsetAtLastIter =
270- mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE);
271- if (!OffsetAtLastIter)
272- return false ;
273- }
266+ mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE, CtxI);
267+ if (!OffsetAtLastIter)
268+ return false ;
274269
275270 const SCEV *OffsetEndBytes = addSCEVNoOverflow (
276- OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE);
271+ OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE, CtxI );
277272 if (!OffsetEndBytes)
278273 return false ;
279274
280275 if (IsKnownNonNegative) {
281276 // For positive steps, check if
282277 // (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
283278 // while making sure none of the computations unsigned wrap themselves.
284- const SCEV *EndBytes = addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE);
279+ const SCEV *EndBytes =
280+ addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE, CtxI);
285281 if (!EndBytes)
286282 return false ;
287-
288- if (!LoopGuards)
289- LoopGuards.emplace (
290- ScalarEvolution::LoopGuards::collect (AR->getLoop (), SE));
291-
292- EndBytes = SE.applyLoopGuards (EndBytes, *LoopGuards);
293283 return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
294284 }
295285
@@ -306,8 +296,7 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
306296 const SCEV *MaxBTC, ScalarEvolution *SE,
307297 DenseMap<std::pair<const SCEV *, Type *>,
308298 std::pair<const SCEV *, const SCEV *>> *PointerBounds,
309- DominatorTree *DT, AssumptionCache *AC,
310- std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
299+ DominatorTree *DT, AssumptionCache *AC) {
311300 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
312301 if (PointerBounds) {
313302 auto [Iter, Ins] = PointerBounds->insert (
@@ -343,7 +332,7 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
343332 // separately checks that accesses cannot not wrap, so unsigned max
344333 // represents an upper bound.
345334 if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE, DL,
346- DT, AC, LoopGuards )) {
335+ DT, AC)) {
347336 ScEnd = AR->evaluateAtIteration (MaxBTC, *SE);
348337 } else {
349338 ScEnd = SE->getAddExpr (
@@ -392,7 +381,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
392381 const SCEV *BTC = PSE.getBackedgeTakenCount ();
393382 const auto &[ScStart, ScEnd] = getStartAndEndForAccess (
394383 Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE (),
395- &DC.getPointerBounds (), DC.getDT (), DC.getAC (), LoopGuards );
384+ &DC.getPointerBounds (), DC.getDT (), DC.getAC ());
396385 assert (!isa<SCEVCouldNotCompute>(ScStart) &&
397386 !isa<SCEVCouldNotCompute>(ScEnd) &&
398387 " must be able to compute both start and end expressions" );
@@ -1998,13 +1987,13 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
19981987 ScalarEvolution &SE = *PSE.getSE ();
19991988 const auto &[SrcStart_, SrcEnd_] =
20001989 getStartAndEndForAccess (InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
2001- &SE, &PointerBounds, DT, AC, LoopGuards );
1990+ &SE, &PointerBounds, DT, AC);
20021991 if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
20031992 return false ;
20041993
20051994 const auto &[SinkStart_, SinkEnd_] =
20061995 getStartAndEndForAccess (InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
2007- &SE, &PointerBounds, DT, AC, LoopGuards );
1996+ &SE, &PointerBounds, DT, AC);
20081997 if (isa<SCEVCouldNotCompute>(SinkStart_) ||
20091998 isa<SCEVCouldNotCompute>(SinkEnd_))
20101999 return false ;
@@ -3051,9 +3040,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30513040 TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector) * 2 ;
30523041
30533042 DepChecker = std::make_unique<MemoryDepChecker>(
3054- *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3055- PtrRtChecking =
3056- std::make_unique<RuntimePointerChecking>(*DepChecker, SE, LoopGuards);
3043+ *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
3044+ PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
30573045 if (canAnalyzeLoop ())
30583046 CanVecMem = analyzeLoop (AA, LI, TLI, DT);
30593047}
0 commit comments