@@ -193,30 +193,28 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
193193// / Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
194194// / return nullptr. \p A and \p B must have the same type.
195195static const SCEV *addSCEVNoOverflow (const SCEV *A, const SCEV *B,
196- ScalarEvolution &SE,
197- const Instruction *CtxI) {
198- if (!SE.willNotOverflow (Instruction::Add, /* IsSigned=*/ false , A, B, CtxI))
196+ ScalarEvolution &SE) {
197+ if (!SE.willNotOverflow (Instruction::Add, /* IsSigned=*/ false , A, B))
199198 return nullptr ;
200199 return SE.getAddExpr (A, B);
201200}
202201
203202// / Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
204203// / return nullptr. \p A and \p B must have the same type.
205204static const SCEV *mulSCEVOverflow (const SCEV *A, const SCEV *B,
206- ScalarEvolution &SE,
207- const Instruction *CtxI) {
208- if (!SE.willNotOverflow (Instruction::Mul, /* IsSigned=*/ false , A, B, CtxI))
205+ ScalarEvolution &SE) {
206+ if (!SE.willNotOverflow (Instruction::Mul, /* IsSigned=*/ false , A, B))
209207 return nullptr ;
210208 return SE.getMulExpr (A, B);
211209}
212210
213211// / Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
214212// / \p MaxBTC is guaranteed inbounds of the accessed object.
215- static bool
216- evaluatePtrAddRecAtMaxBTCWillNotWrap ( const SCEVAddRecExpr *AR,
217- const SCEV *MaxBTC, const SCEV *EltSize ,
218- ScalarEvolution &SE, const DataLayout &DL ,
219- DominatorTree *DT, AssumptionCache *AC ) {
213+ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap (
214+ const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize ,
215+ ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT ,
216+ AssumptionCache *AC ,
217+ std::optional<ScalarEvolution::LoopGuards> &LoopGuards ) {
220218 auto *PointerBase = SE.getPointerBase (AR->getStart ());
221219 auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
222220 if (!StartPtr)
@@ -234,12 +232,11 @@ evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
234232 Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
235233 const SCEV *DerefBytesSCEV = SE.getConstant (WiderTy, DerefBytes);
236234
237- // Context which dominates the entire loop.
238- auto *CtxI = L->getLoopPredecessor ()->getTerminator ();
239235 // Check if we have a suitable dereferencable assumption we can use.
240236 if (!StartPtrV->canBeFreed ()) {
241237 RetainedKnowledge DerefRK = getKnowledgeValidInContext (
242- StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT);
238+ StartPtrV, {Attribute::Dereferenceable}, *AC,
239+ L->getLoopPredecessor ()->getTerminator (), DT);
243240 if (DerefRK) {
244241 DerefBytesSCEV = SE.getUMaxExpr (
245242 DerefBytesSCEV, SE.getConstant (WiderTy, DerefRK.ArgValue ));
@@ -263,23 +260,39 @@ evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
263260 SE.getMinusSCEV (AR->getStart (), StartPtr), WiderTy);
264261
265262 const SCEV *OffsetAtLastIter =
266- mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE, CtxI);
267- if (!OffsetAtLastIter)
268- return false ;
263+ mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE);
264+ if (!OffsetAtLastIter) {
265+ // Re-try with constant max backedge-taken count if using the symbolic one
266+ // failed.
267+ MaxBTC = SE.getConstantMaxBackedgeTakenCount (AR->getLoop ());
268+ if (isa<SCEVCouldNotCompute>(MaxBTC))
269+ return false ;
270+ MaxBTC = SE.getNoopOrZeroExtend (
271+ MaxBTC, WiderTy);
272+ OffsetAtLastIter =
273+ mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, /* IsNSW=*/ false ), SE);
274+ if (!OffsetAtLastIter)
275+ return false ;
276+ }
269277
270278 const SCEV *OffsetEndBytes = addSCEVNoOverflow (
271- OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE, CtxI );
279+ OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE);
272280 if (!OffsetEndBytes)
273281 return false ;
274282
275283 if (IsKnownNonNegative) {
276284 // For positive steps, check if
277285 // (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
278286 // while making sure none of the computations unsigned wrap themselves.
279- const SCEV *EndBytes =
280- addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE, CtxI);
287+ const SCEV *EndBytes = addSCEVNoOverflow (StartOffset, OffsetEndBytes, SE);
281288 if (!EndBytes)
282289 return false ;
290+
291+ if (!LoopGuards)
292+ LoopGuards.emplace (
293+ ScalarEvolution::LoopGuards::collect (AR->getLoop (), SE));
294+
295+ EndBytes = SE.applyLoopGuards (EndBytes, *LoopGuards);
283296 return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
284297 }
285298
@@ -296,7 +309,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
296309 const SCEV *MaxBTC, ScalarEvolution *SE,
297310 DenseMap<std::pair<const SCEV *, Type *>,
298311 std::pair<const SCEV *, const SCEV *>> *PointerBounds,
299- DominatorTree *DT, AssumptionCache *AC) {
312+ DominatorTree *DT, AssumptionCache *AC,
313+ std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
300314 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
301315 if (PointerBounds) {
302316 auto [Iter, Ins] = PointerBounds->insert (
@@ -332,7 +346,7 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
332346 // separately checks that accesses cannot not wrap, so unsigned max
333347 // represents an upper bound.
334348 if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE, DL,
335- DT, AC)) {
349+ DT, AC, LoopGuards )) {
336350 ScEnd = AR->evaluateAtIteration (MaxBTC, *SE);
337351 } else {
338352 ScEnd = SE->getAddExpr (
@@ -381,7 +395,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
381395 const SCEV *BTC = PSE.getBackedgeTakenCount ();
382396 const auto &[ScStart, ScEnd] = getStartAndEndForAccess (
383397 Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE (),
384- &DC.getPointerBounds (), DC.getDT (), DC.getAC ());
398+ &DC.getPointerBounds (), DC.getDT (), DC.getAC (), LoopGuards );
385399 assert (!isa<SCEVCouldNotCompute>(ScStart) &&
386400 !isa<SCEVCouldNotCompute>(ScEnd) &&
387401 " must be able to compute both start and end expressions" );
@@ -1986,13 +2000,13 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
19862000 ScalarEvolution &SE = *PSE.getSE ();
19872001 const auto &[SrcStart_, SrcEnd_] =
19882002 getStartAndEndForAccess (InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
1989- &SE, &PointerBounds, DT, AC);
2003+ &SE, &PointerBounds, DT, AC, LoopGuards );
19902004 if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
19912005 return false ;
19922006
19932007 const auto &[SinkStart_, SinkEnd_] =
19942008 getStartAndEndForAccess (InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
1995- &SE, &PointerBounds, DT, AC);
2009+ &SE, &PointerBounds, DT, AC, LoopGuards );
19962010 if (isa<SCEVCouldNotCompute>(SinkStart_) ||
19972011 isa<SCEVCouldNotCompute>(SinkEnd_))
19982012 return false ;
@@ -3039,8 +3053,9 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30393053 TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector) * 2 ;
30403054
30413055 DepChecker = std::make_unique<MemoryDepChecker>(
3042- *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
3043- PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
3056+ *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3057+ PtrRtChecking =
3058+ std::make_unique<RuntimePointerChecking>(*DepChecker, SE, LoopGuards);
30443059 if (canAnalyzeLoop ())
30453060 CanVecMem = analyzeLoop (AA, LI, TLI, DT);
30463061}
0 commit comments