|
13 | 13 | #include "llvm/Analysis/Loads.h" |
14 | 14 | #include "llvm/Analysis/AliasAnalysis.h" |
15 | 15 | #include "llvm/Analysis/AssumeBundleQueries.h" |
| 16 | +#include "llvm/Analysis/LoopAccessAnalysis.h" |
16 | 17 | #include "llvm/Analysis/LoopInfo.h" |
17 | 18 | #include "llvm/Analysis/MemoryBuiltins.h" |
18 | 19 | #include "llvm/Analysis/MemoryLocation.h" |
@@ -277,84 +278,90 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { |
277 | 278 | bool llvm::isDereferenceableAndAlignedInLoop( |
278 | 279 | LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, |
279 | 280 | AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) { |
| 281 | + const Align Alignment = LI->getAlign(); |
280 | 282 | auto &DL = LI->getDataLayout(); |
281 | 283 | Value *Ptr = LI->getPointerOperand(); |
282 | | - |
283 | 284 | APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), |
284 | 285 | DL.getTypeStoreSize(LI->getType()).getFixedValue()); |
285 | | - const Align Alignment = LI->getAlign(); |
286 | | - |
287 | | - Instruction *HeaderFirstNonPHI = &*L->getHeader()->getFirstNonPHIIt(); |
288 | 286 |
|
289 | 287 | // If given a uniform (i.e. non-varying) address, see if we can prove the |
290 | 288 | // access is safe within the loop w/o needing predication. |
291 | 289 | if (L->isLoopInvariant(Ptr)) |
292 | | - return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL, |
293 | | - HeaderFirstNonPHI, AC, &DT); |
| 290 | + return isDereferenceableAndAlignedPointer( |
| 291 | + Ptr, Alignment, EltSize, DL, &*L->getHeader()->getFirstNonPHIIt(), AC, |
| 292 | + &DT); |
| 293 | + |
| 294 | + const SCEV *PtrScev = SE.getSCEV(Ptr); |
| 295 | + auto *AddRec = dyn_cast<SCEVAddRecExpr>(PtrScev); |
294 | 296 |
|
295 | | - // Otherwise, check to see if we have a repeating access pattern where we can |
296 | | - // prove that all accesses are well aligned and dereferenceable. |
297 | | - auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr)); |
| 297 | + // Check to see if we have a repeating access pattern and it's possible |
| 298 | + // to prove all accesses are well aligned. |
298 | 299 | if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine()) |
299 | 300 | return false; |
300 | | - auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE)); |
| 301 | + |
| 302 | + auto *Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE)); |
301 | 303 | if (!Step) |
302 | 304 | return false; |
303 | 305 |
|
304 | | - auto TC = SE.getSmallConstantMaxTripCount(L, Predicates); |
305 | | - if (!TC) |
| 306 | + // For the moment, restrict ourselves to the case where the access size is a |
| 307 | + // multiple of the requested alignment and the base is aligned. |
| 308 | + // TODO: generalize if a case found which warrants |
| 309 | + if (EltSize.urem(Alignment.value()) != 0) |
306 | 310 | return false; |
307 | 311 |
|
308 | 312 | // TODO: Handle overlapping accesses. |
309 | | - // We should be computing AccessSize as (TC - 1) * Step + EltSize. |
310 | | - if (EltSize.sgt(Step->getAPInt())) |
| 313 | + if (EltSize.ugt(Step->getAPInt().abs())) |
| 314 | + return false; |
| 315 | + |
| 316 | + const SCEV *MaxBECount = |
| 317 | + Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates) |
| 318 | + : SE.getConstantMaxBackedgeTakenCount(L); |
| 319 | + if (isa<SCEVCouldNotCompute>(MaxBECount)) |
| 320 | + return false; |
| 321 | + |
| 322 | + const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess( |
| 323 | + L, PtrScev, LI->getType(), MaxBECount, &SE, nullptr); |
| 324 | + if (isa<SCEVCouldNotCompute>(AccessStart) || |
| 325 | + isa<SCEVCouldNotCompute>(AccessEnd)) |
311 | 326 | return false; |
312 | 327 |
|
313 | | - // Compute the total access size for access patterns with unit stride and |
314 | | - // patterns with gaps. For patterns with unit stride, Step and EltSize are the |
315 | | - // same. |
316 | | - // For patterns with gaps (i.e. non unit stride), we are |
317 | | - // accessing EltSize bytes at every Step. |
318 | | - APInt AccessSize = TC * Step->getAPInt(); |
| 328 | + // Try to get the access size. |
| 329 | + const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart); |
| 330 | + APInt MaxPtrDiff = SE.getUnsignedRangeMax(PtrDiff); |
319 | 331 |
|
320 | | - assert(SE.isLoopInvariant(AddRec->getStart(), L) && |
321 | | - "implied by addrec definition"); |
322 | 332 | Value *Base = nullptr; |
323 | | - if (auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart())) { |
324 | | - Base = StartS->getValue(); |
325 | | - } else if (auto *StartS = dyn_cast<SCEVAddExpr>(AddRec->getStart())) { |
326 | | - // Handle (NewBase + offset) as start value. |
327 | | - const auto *Offset = dyn_cast<SCEVConstant>(StartS->getOperand(0)); |
328 | | - const auto *NewBase = dyn_cast<SCEVUnknown>(StartS->getOperand(1)); |
329 | | - if (StartS->getNumOperands() == 2 && Offset && NewBase) { |
330 | | - // The following code below assumes the offset is unsigned, but GEP |
331 | | - // offsets are treated as signed so we can end up with a signed value |
332 | | - // here too. For example, suppose the initial PHI value is (i8 255), |
333 | | - // the offset will be treated as (i8 -1) and sign-extended to (i64 -1). |
334 | | - if (Offset->getAPInt().isNegative()) |
335 | | - return false; |
| 333 | + APInt AccessSize; |
| 334 | + if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(AccessStart)) { |
| 335 | + Base = NewBase->getValue(); |
| 336 | + AccessSize = MaxPtrDiff; |
| 337 | + } else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) { |
| 338 | + if (MinAdd->getNumOperands() != 2) |
| 339 | + return false; |
336 | 340 |
|
337 | | - // For the moment, restrict ourselves to the case where the offset is a |
338 | | - // multiple of the requested alignment and the base is aligned. |
339 | | - // TODO: generalize if a case found which warrants |
340 | | - if (Offset->getAPInt().urem(Alignment.value()) != 0) |
341 | | - return false; |
342 | | - Base = NewBase->getValue(); |
343 | | - bool Overflow = false; |
344 | | - AccessSize = AccessSize.uadd_ov(Offset->getAPInt(), Overflow); |
345 | | - if (Overflow) |
346 | | - return false; |
347 | | - } |
348 | | - } |
| 341 | + const auto *Offset = dyn_cast<SCEVConstant>(MinAdd->getOperand(0)); |
| 342 | + const auto *NewBase = dyn_cast<SCEVUnknown>(MinAdd->getOperand(1)); |
| 343 | + if (!Offset || !NewBase) |
| 344 | + return false; |
349 | 345 |
|
350 | | - if (!Base) |
351 | | - return false; |
| 346 | + // The following code below assumes the offset is unsigned, but GEP |
| 347 | + // offsets are treated as signed so we can end up with a signed value |
| 348 | + // here too. For example, suppose the initial PHI value is (i8 255), |
| 349 | + // the offset will be treated as (i8 -1) and sign-extended to (i64 -1). |
| 350 | + if (Offset->getAPInt().isNegative()) |
| 351 | + return false; |
352 | 352 |
|
353 | | - // For the moment, restrict ourselves to the case where the access size is a |
354 | | - // multiple of the requested alignment and the base is aligned. |
355 | | - // TODO: generalize if a case found which warrants |
356 | | - if (EltSize.urem(Alignment.value()) != 0) |
| 353 | + // For the moment, restrict ourselves to the case where the offset is a |
| 354 | + // multiple of the requested alignment and the base is aligned. |
| 355 | + // TODO: generalize if a case found which warrants |
| 356 | + if (Offset->getAPInt().urem(Alignment.value()) != 0) |
| 357 | + return false; |
| 358 | + |
| 359 | + AccessSize = MaxPtrDiff + Offset->getAPInt(); |
| 360 | + Base = NewBase->getValue(); |
| 361 | + } else |
357 | 362 | return false; |
| 363 | + |
| 364 | + Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI(); |
358 | 365 | return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL, |
359 | 366 | HeaderFirstNonPHI, AC, &DT); |
360 | 367 | } |
|
0 commit comments