@@ -1088,29 +1088,49 @@ static void findForkedSCEVs(
10881088  }
10891089}
10901090
1091- static  SmallVector<PointerIntPair<const  SCEV *, 1 , bool >>
1092- findForkedPointer (PredicatedScalarEvolution &PSE,
1093-                   const  DenseMap<Value *, const  SCEV *> &StridesMap, Value *Ptr,
1094-                   const  Loop *L) {
1095-   ScalarEvolution *SE = PSE.getSE ();
1096-   assert (SE->isSCEVable (Ptr->getType ()) && " Value is not SCEVable!"  );
1097-   SmallVector<PointerIntPair<const  SCEV *, 1 , bool >> Scevs;
1098-   findForkedSCEVs (SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
1099- 
1100-   //  For now, we will only accept a forked pointer with two possible SCEVs
1101-   //  that are either SCEVAddRecExprs or loop invariant.
1102-   if  (Scevs.size () == 2  &&
1103-       (isa<SCEVAddRecExpr>(get<0 >(Scevs[0 ])) ||
1104-        SE->isLoopInvariant (get<0 >(Scevs[0 ]), L)) &&
1105-       (isa<SCEVAddRecExpr>(get<0 >(Scevs[1 ])) ||
1106-        SE->isLoopInvariant (get<0 >(Scevs[1 ]), L))) {
1107-     LLVM_DEBUG (dbgs () << " LAA: Found forked pointer: "   << *Ptr << " \n "  );
1108-     LLVM_DEBUG (dbgs () << " \t (1) "   << *get<0 >(Scevs[0 ]) << " \n "  );
1109-     LLVM_DEBUG (dbgs () << " \t (2) "   << *get<0 >(Scevs[1 ]) << " \n "  );
1110-     return  Scevs;
1111-   }
1112- 
1113-   return  {{replaceSymbolicStrideSCEV (PSE, StridesMap, Ptr), false }};
1091+ // / Given \p ForkedSCEVs corresponding to \p Ptr, get AddRecs from \p Assume and
1092+ // / \p StridesMap, and return SCEVs that could potentially be checked at runtime
1093+ // / (AddRecs and loop-invariants). Returns an empty range as an early exit.
1094+ static  iterator_range<PointerIntPair<const  SCEV *, 1 , bool > *> getRTCheckPtrs (
1095+     PredicatedScalarEvolution &PSE, const  Loop *L, Value *Ptr,
1096+     MutableArrayRef<PointerIntPair<const  SCEV *, 1 , bool >> ForkedSCEVs,
1097+     const  DenseMap<Value *, const  SCEV *> &StridesMap, bool  Assume) {
1098+   for  (auto  &P : ForkedSCEVs) {
1099+     auto  *AR = dyn_cast<SCEVAddRecExpr>(P.getPointer ());
1100+     if  (!AR && Assume)
1101+       AR = PSE.getAsAddRec (Ptr);
1102+ 
1103+     //  Call replaceSymbolicStrideSCEV only after PSE.getAsAddRec, because
1104+     //  assumptions might have been added to PSE, resulting in simplifications.
1105+     const  SCEV *S = replaceSymbolicStrideSCEV (PSE, StridesMap, Ptr);
1106+     auto  *SAR = dyn_cast<SCEVAddRecExpr>(S);
1107+ 
1108+     if  (auto  *PtrVal = SAR ? SAR : AR; PtrVal && PtrVal->isAffine ())
1109+       P.setPointer (PtrVal);
1110+     else  if  (!PSE.getSE ()->isLoopInvariant (P.getPointer (), L))
1111+       return  {ForkedSCEVs.end (), ForkedSCEVs.end ()};
1112+   }
1113+ 
1114+   //  De-duplicate the ForkedSCEVs. If two SCEVs are equal, prefer the SCEV that
1115+   //  doesn't need freeze.
1116+   auto  PtrEq = [](const  auto  &P, const  auto  &Q) {
1117+     return  get<0 >(P) == get<0 >(Q);
1118+   };
1119+   auto  FreezeLess = [PtrEq](const  auto  &P, const  auto  &Q) {
1120+     return  PtrEq (P, Q) && get<1 >(P) < get<1 >(Q);
1121+   };
1122+   stable_sort (ForkedSCEVs, FreezeLess);
1123+   auto  UniqPtrs = make_range (ForkedSCEVs.begin (), unique (ForkedSCEVs, PtrEq));
1124+ 
1125+   if  (size (UniqPtrs) == 1 ) {
1126+     //  FIXME: Is this correct?
1127+     UniqPtrs.begin ()->setInt (false );
1128+     return  UniqPtrs;
1129+   }
1130+   LLVM_DEBUG (dbgs () << " LAA: Found forked pointer: "   << *Ptr << " \n "  );
1131+   for  (auto  [Idx, P] : enumerate(UniqPtrs))
1132+     LLVM_DEBUG (dbgs () << " \t ("   << Idx << " ) "   << *P.getPointer () << " \n "  );
1133+   return  UniqPtrs;
11141134}
11151135
11161136bool  AccessAnalysis::createCheckForAccess (
@@ -1119,42 +1139,25 @@ bool AccessAnalysis::createCheckForAccess(
11191139    DenseMap<Value *, unsigned > &DepSetId, Loop *TheLoop,
11201140    unsigned  &RunningDepId, unsigned  ASId, bool  Assume) {
11211141  Value *Ptr = Access.getPointer ();
1142+   ScalarEvolution *SE = PSE.getSE ();
1143+   assert (SE->isSCEVable (Ptr->getType ()) && " Value is not SCEVable!"  );
11221144
1123-   SmallVector<PointerIntPair<const  SCEV *, 1 , bool >> TranslatedPtrs =
1124-       findForkedPointer (PSE, StridesMap, Ptr, TheLoop);
1125-   assert (!TranslatedPtrs.empty () && " must have some translated pointers"  );
1126- 
1127-   // / Check whether all pointers can participate in a runtime bounds check. They
1128-   // / must either be invariant or AddRecs. If ShouldCheckWrap is true, they also
1129-   // / must not wrap.
1130-   for  (auto  &P : TranslatedPtrs) {
1131-     //  The bounds for loop-invariant pointer is trivial.
1132-     if  (PSE.getSE ()->isLoopInvariant (P.getPointer (), TheLoop))
1133-       continue ;
1134- 
1135-     const  SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(P.getPointer ());
1136-     if  (!AR && Assume)
1137-       AR = PSE.getAsAddRec (Ptr);
1138-     if  (!AR || !AR->isAffine ())
1139-       return  false ;
1140- 
1141-     //  If there's only one option for Ptr, look it up after bounds and wrap
1142-     //  checking, because assumptions might have been added to PSE.
1143-     if  (TranslatedPtrs.size () == 1 ) {
1144-       AR =
1145-           cast<SCEVAddRecExpr>(replaceSymbolicStrideSCEV (PSE, StridesMap, Ptr));
1146-       P.setPointer (AR);
1147-     }
1148- 
1149-     //  When we run after a failing dependency check we have to make sure
1150-     //  we don't have wrapping pointers.
1151-     if  (!isNoWrap (PSE, AR, TranslatedPtrs.size () == 1  ? Ptr : nullptr , AccessTy,
1152-                   TheLoop, Assume)) {
1153-       return  false ;
1154-     }
1155-   }
1145+   //  Find the ForkedSCEVs, and prepare the runtime-check pointers.
1146+   SmallVector<PointerIntPair<const  SCEV *, 1 , bool >> ForkedSCEVs;
1147+   findForkedSCEVs (SE, TheLoop, Ptr, ForkedSCEVs, MaxForkedSCEVDepth);
1148+   auto  RTCheckPtrs =
1149+       getRTCheckPtrs (PSE, TheLoop, Ptr, ForkedSCEVs, StridesMap, Assume);
1150+ 
1151+   // / Check whether all pointers can participate in a runtime bounds check: they
1152+   // / must either be loop-invariant, or an affine AddRec that does not wrap.
1153+   if  (!size (RTCheckPtrs) || any_of (RTCheckPtrs, [&](const  auto  &P) {
1154+         auto  *AR = dyn_cast<SCEVAddRecExpr>(P.getPointer ());
1155+         return  AR && !isNoWrap (PSE, AR, size (RTCheckPtrs) == 1  ? Ptr : nullptr ,
1156+                                AccessTy, TheLoop, Assume);
1157+       }))
1158+     return  false ;
11561159
1157-   for  (auto  [PtrExpr, NeedsFreeze] : TranslatedPtrs ) {
1160+   for  (auto  [PtrExpr, NeedsFreeze] : RTCheckPtrs ) {
11581161    //  The id of the dependence set.
11591162    unsigned  DepId;
11601163
0 commit comments