@@ -806,11 +806,11 @@ class AccessAnalysis {
806806 typedef SmallVector<MemAccessInfo, 8 > MemAccessInfoList;
807807
808808 AccessAnalysis (const Loop *TheLoop, AAResults *AA, const LoopInfo *LI,
809- MemoryDepChecker::DepCandidates &DA,
809+ DominatorTree &DT, MemoryDepChecker::DepCandidates &DA,
810810 PredicatedScalarEvolution &PSE,
811811 SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
812- : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA ), PSE(PSE ),
813- LoopAliasScopes (LoopAliasScopes) {
812+ : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT ), DepCands(DA ),
813+ PSE (PSE), LoopAliasScopes(LoopAliasScopes) {
814814 // We're analyzing dependences across loop iterations.
815815 BAA.enableCrossIterationMode ();
816816 }
@@ -934,6 +934,9 @@ class AccessAnalysis {
934934 // / The LoopInfo of the loop being checked.
935935 const LoopInfo *LI;
936936
937+ // / The dominator tree of the function.
938+ DominatorTree &DT;
939+
937940 // / Sets of potentially dependent accesses - members of one set share an
938941 // / underlying pointer. The set "CheckDeps" identfies which sets really need a
939942 // / dependence check.
@@ -1015,6 +1018,7 @@ getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
10151018// / informating from the IR pointer value to determine no-wrap.
10161019static bool isNoWrap (PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR,
10171020 Value *Ptr, Type *AccessTy, const Loop *L, bool Assume,
1021+ const DominatorTree &DT,
10181022 std::optional<int64_t > Stride = std::nullopt ) {
10191023 // FIXME: This should probably only return true for NUW.
10201024 if (AR->getNoWrapFlags (SCEV::NoWrapMask))
@@ -1029,8 +1033,18 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR,
10291033 // case, the GEP would be poison and any memory access dependent on it would
10301034 // be immediate UB when executed.
10311035 if (auto *GEP = dyn_cast_if_present<GetElementPtrInst>(Ptr);
1032- GEP && GEP->hasNoUnsignedSignedWrap ())
1033- return true ;
1036+ GEP && GEP->hasNoUnsignedSignedWrap ()) {
1037+ // For the above reasoning to apply, the pointer must be dereferenced in
1038+ // every iteration.
1039+ if (L->getHeader () == L->getLoopLatch () ||
1040+ any_of (GEP->users (), [L, &DT, GEP](User *U) {
1041+ if (getLoadStorePointerOperand (U) != GEP)
1042+ return false ;
1043+ BasicBlock *UserBB = cast<Instruction>(U)->getParent ();
1044+ return !LoopAccessInfo::blockNeedsPredication (UserBB, L, &DT);
1045+ }))
1046+ return true ;
1047+ }
10341048
10351049 if (!Stride)
10361050 Stride = getStrideFromAddRec (AR, L, AccessTy, Ptr, PSE);
@@ -1293,7 +1307,7 @@ bool AccessAnalysis::createCheckForAccess(
12931307 }
12941308
12951309 if (!isNoWrap (PSE, AR, RTCheckPtrs.size () == 1 ? Ptr : nullptr , AccessTy,
1296- TheLoop, Assume))
1310+ TheLoop, Assume, DT ))
12971311 return false ;
12981312 }
12991313
@@ -1606,7 +1620,7 @@ void AccessAnalysis::processMemAccesses() {
16061620// / Check whether the access through \p Ptr has a constant stride.
16071621std::optional<int64_t >
16081622llvm::getPtrStride (PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
1609- const Loop *Lp,
1623+ const Loop *Lp, const DominatorTree &DT,
16101624 const DenseMap<Value *, const SCEV *> &StridesMap,
16111625 bool Assume, bool ShouldCheckWrap) {
16121626 const SCEV *PtrScev = replaceSymbolicStrideSCEV (PSE, StridesMap, Ptr);
@@ -1630,7 +1644,7 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
16301644 if (!ShouldCheckWrap || !Stride)
16311645 return Stride;
16321646
1633- if (isNoWrap (PSE, AR, Ptr, AccessTy, Lp, Assume, Stride))
1647+ if (isNoWrap (PSE, AR, Ptr, AccessTy, Lp, Assume, DT, Stride))
16341648 return Stride;
16351649
16361650 LLVM_DEBUG (
@@ -2047,10 +2061,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
20472061 BPtr->getType ()->getPointerAddressSpace ())
20482062 return MemoryDepChecker::Dependence::Unknown;
20492063
2050- std::optional<int64_t > StrideAPtr =
2051- getPtrStride ( PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true , true );
2052- std::optional<int64_t > StrideBPtr =
2053- getPtrStride ( PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true , true );
2064+ std::optional<int64_t > StrideAPtr = getPtrStride (
2065+ PSE, ATy, APtr, InnermostLoop, *DT , SymbolicStrides, true , true );
2066+ std::optional<int64_t > StrideBPtr = getPtrStride (
2067+ PSE, BTy, BPtr, InnermostLoop, *DT , SymbolicStrides, true , true );
20542068
20552069 const SCEV *Src = PSE.getSCEV (APtr);
20562070 const SCEV *Sink = PSE.getSCEV (BPtr);
@@ -2627,7 +2641,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
26272641 }
26282642
26292643 MemoryDepChecker::DepCandidates DepCands;
2630- AccessAnalysis Accesses (TheLoop, AA, LI, DepCands, *PSE, LoopAliasScopes);
2644+ AccessAnalysis Accesses (TheLoop, AA, LI, *DT, DepCands, *PSE,
2645+ LoopAliasScopes);
26312646
26322647 // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
26332648 // multiple times on the same object. If the ptr is accessed twice, once
@@ -2691,7 +2706,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
26912706 bool IsReadOnlyPtr = false ;
26922707 Type *AccessTy = getLoadStoreType (LD);
26932708 if (Seen.insert ({Ptr, AccessTy}).second ||
2694- !getPtrStride (*PSE, AccessTy, Ptr, TheLoop, SymbolicStrides)) {
2709+ !getPtrStride (*PSE, AccessTy, Ptr, TheLoop, *DT, SymbolicStrides, false ,
2710+ true )) {
26952711 ++NumReads;
26962712 IsReadOnlyPtr = true ;
26972713 }
0 commit comments