@@ -188,9 +188,90 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
188188 Members.push_back (Index);
189189}
190190
191+ // / Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
192+ // / return nullptr. \p A and \p B must have the same type.
193+ static const SCEV *addSCEVOverflow (const SCEV *A, const SCEV *B,
194+ ScalarEvolution &SE) {
195+ if (!SE.willNotOverflow (Instruction::Add, false , A, B))
196+ return nullptr ;
197+ return SE.getAddExpr (A, B);
198+ }
199+
200+ // / Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
201+ // / return nullptr. \p A and \p B must have the same type.
202+ static const SCEV *mulSCEVOverflow (const SCEV *A, const SCEV *B,
203+ ScalarEvolution &SE) {
204+ if (!SE.willNotOverflow (Instruction::Mul, false , A, B))
205+ return nullptr ;
206+ return SE.getMulExpr (A, B);
207+ }
208+
209+ // / Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
210+ // / \p MaxBTC is guaranteed inbounds of the accessed object.
211+ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap (const SCEVAddRecExpr *AR,
212+ const SCEV *MaxBTC,
213+ const SCEV *EltSize,
214+ ScalarEvolution &SE,
215+ const DataLayout &DL) {
216+ auto *PointerBase = SE.getPointerBase (AR->getStart ());
217+ auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
218+ if (!StartPtr)
219+ return false ;
220+ bool CheckForNonNull, CheckForFreed;
221+ uint64_t DerefBytes = StartPtr->getValue ()->getPointerDereferenceableBytes (
222+ DL, CheckForNonNull, CheckForFreed);
223+
224+ if (CheckForNonNull || CheckForFreed)
225+ return false ;
226+
227+ const SCEV *Step = AR->getStepRecurrence (SE);
228+ bool IsKnownNonNegative = SE.isKnownNonNegative (Step);
229+ if (!IsKnownNonNegative && !SE.isKnownNegative (Step))
230+ return false ;
231+
232+ Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
233+ Step = SE.getNoopOrSignExtend (Step, WiderTy);
234+ MaxBTC = SE.getNoopOrZeroExtend (MaxBTC, WiderTy);
235+
236+ // For the computations below, make sure they don't unsigned wrap.
237+ if (!SE.isKnownPredicate (CmpInst::ICMP_UGE, AR->getStart (), StartPtr))
238+ return false ;
239+ const SCEV *StartOffset = SE.getNoopOrZeroExtend (
240+ SE.getMinusSCEV (AR->getStart (), StartPtr), WiderTy);
241+
242+ const SCEV *OffsetAtLastIter =
243+ mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, false ), SE);
244+ if (!OffsetAtLastIter)
245+ return false ;
246+
247+ const SCEV *OffsetEndBytes = addSCEVOverflow (
248+ OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE);
249+ if (!OffsetEndBytes)
250+ return false ;
251+
252+ if (IsKnownNonNegative) {
253+ // For positive steps, check if
254+ // (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
255+ // while making sure none of the computations unsigned wrap themselves.
256+ const SCEV *EndBytes = addSCEVOverflow (StartOffset, OffsetEndBytes, SE);
257+ if (!EndBytes)
258+ return false ;
259+ return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes,
260+ SE.getConstant (WiderTy, DerefBytes));
261+ }
262+
263+ // For negative steps check if
264+ // * StartOffset >= (MaxBTC * Step + EltSize)
265+ // * StartOffset <= DerefBytes.
266+ assert (SE.isKnownNegative (Step) && " must be known negative" );
267+ return SE.isKnownPredicate (CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
268+ SE.isKnownPredicate (CmpInst::ICMP_ULE, StartOffset,
269+ SE.getConstant (WiderTy, DerefBytes));
270+ }
271+
191272std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess (
192- const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount ,
193- ScalarEvolution *SE,
273+ const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC ,
274+ const SCEV *MaxBTC, ScalarEvolution *SE,
194275 DenseMap<std::pair<const SCEV *, Type *>,
195276 std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
196277 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
@@ -206,11 +287,37 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
206287 const SCEV *ScStart;
207288 const SCEV *ScEnd;
208289
290+ auto &DL = Lp->getHeader ()->getDataLayout ();
291+ Type *IdxTy = DL.getIndexType (PtrExpr->getType ());
292+ const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr (IdxTy, AccessTy);
209293 if (SE->isLoopInvariant (PtrExpr, Lp)) {
210294 ScStart = ScEnd = PtrExpr;
211295 } else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
212296 ScStart = AR->getStart ();
213- ScEnd = AR->evaluateAtIteration (MaxBECount, *SE);
297+ if (!isa<SCEVCouldNotCompute>(BTC))
298+ // Evaluating AR at an exact BTC is safe: LAA separately checks that
299+ // accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then
300+ // the loop either triggers UB when executing a memory access with a
301+ // poison pointer or the wrapping/poisoned pointer is not used.
302+ ScEnd = AR->evaluateAtIteration (BTC, *SE);
303+ else {
304+ // Evaluating AR at MaxBTC may wrap and create an expression that is less
305+ // than the start of the AddRec due to wrapping (for example consider
306+ // MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd
307+ // will get incremented by EltSize before returning, so this effectively
308+ // sets ScEnd to the maximum unsigned value for the type. Note that LAA
309+ // separately checks that accesses cannot not wrap, so unsigned max
310+ // represents an upper bound.
311+ if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE,
312+ DL)) {
313+ ScEnd = AR->evaluateAtIteration (MaxBTC, *SE);
314+ } else {
315+ ScEnd = SE->getAddExpr (
316+ SE->getNegativeSCEV (EltSizeSCEV),
317+ SE->getSCEV (ConstantExpr::getIntToPtr (
318+ ConstantInt::get (EltSizeSCEV->getType (), -1 ), AR->getType ())));
319+ }
320+ }
214321 const SCEV *Step = AR->getStepRecurrence (*SE);
215322
216323 // For expressions with negative step, the upper bound is ScStart and the
@@ -232,9 +339,6 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
232339 assert (SE->isLoopInvariant (ScEnd, Lp) && " ScEnd needs to be invariant" );
233340
234341 // Add the size of the pointed element to ScEnd.
235- auto &DL = Lp->getHeader ()->getDataLayout ();
236- Type *IdxTy = DL.getIndexType (PtrExpr->getType ());
237- const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr (IdxTy, AccessTy);
238342 ScEnd = SE->getAddExpr (ScEnd, EltSizeSCEV);
239343
240344 std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
@@ -250,9 +354,11 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
250354 unsigned DepSetId, unsigned ASId,
251355 PredicatedScalarEvolution &PSE,
252356 bool NeedsFreeze) {
253- const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount ();
254- const auto &[ScStart, ScEnd] = getStartAndEndForAccess (
255- Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE (), &DC.getPointerBounds ());
357+ const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
358+ const SCEV *BTC = PSE.getBackedgeTakenCount ();
359+ const auto &[ScStart, ScEnd] =
360+ getStartAndEndForAccess (Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
361+ PSE.getSE (), &DC.getPointerBounds ());
256362 assert (!isa<SCEVCouldNotCompute>(ScStart) &&
257363 !isa<SCEVCouldNotCompute>(ScEnd) &&
258364 " must be able to compute both start and end expressions" );
@@ -1907,11 +2013,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19072013 // required for correctness.
19082014 if (SE.isLoopInvariant (Src, InnermostLoop) ||
19092015 SE.isLoopInvariant (Sink, InnermostLoop)) {
1910- const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount ();
1911- const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess (
1912- InnermostLoop, Src, ATy, MaxBECount, PSE.getSE (), &PointerBounds);
1913- const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess (
1914- InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE (), &PointerBounds);
2016+ const SCEV *BTC = PSE.getBackedgeTakenCount ();
2017+ const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
2018+ const auto &[SrcStart_, SrcEnd_] =
2019+ getStartAndEndForAccess (InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC,
2020+ PSE.getSE (), &PointerBounds);
2021+ const auto &[SinkStart_, SinkEnd_] =
2022+ getStartAndEndForAccess (InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC,
2023+ PSE.getSE (), &PointerBounds);
19152024 if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
19162025 !isa<SCEVCouldNotCompute>(SrcEnd_) &&
19172026 !isa<SCEVCouldNotCompute>(SinkStart_) &&
0 commit comments