3131#include " llvm/Analysis/ScalarEvolution.h"
3232#include " llvm/Analysis/ScalarEvolutionExpressions.h"
3333#include " llvm/Analysis/TargetLibraryInfo.h"
34+ #include " llvm/Analysis/TargetTransformInfo.h"
3435#include " llvm/Analysis/ValueTracking.h"
3536#include " llvm/Analysis/VectorUtils.h"
3637#include " llvm/IR/BasicBlock.h"
@@ -2122,32 +2123,34 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
21222123 return Dependence::Forward;
21232124 }
21242125
2125- if (!C) {
2126- // TODO: FoundNonConstantDistanceDependence is used as a necessary condition
2127- // to consider retrying with runtime checks. Historically, we did not set it
2128- // when strides were different but there is no inherent reason to.
2126+ int64_t MinDistance = SE.getSignedRangeMin (Dist).getSExtValue ();
2127+ // Below we only handle strictly positive distances.
2128+ if (MinDistance <= 0 ) {
21292129 FoundNonConstantDistanceDependence |= CommonStride.has_value ();
2130- LLVM_DEBUG (dbgs () << " LAA: Dependence because of non-constant distance\n " );
21312130 return Dependence::Unknown;
21322131 }
21332132
2134- if (!SE.isKnownPositive (Dist))
2135- return Dependence::Unknown;
2133+ if (!isa<SCEVConstant>(Dist)) {
2134+ // Previously this case would be treated as Unknown, possibly setting
2135+ // FoundNonConstantDistanceDependence to force re-trying with runtime
2136+ // checks. Until the TODO below is addressed, set it here to preserve
2137+ // original behavior w.r.t. re-trying with runtime checks.
2138+ // TODO: FoundNonConstantDistanceDependence is used as a necessary
2139+ // condition to consider retrying with runtime checks. Historically, we
2140+ // did not set it when strides were different but there is no inherent
2141+ // reason to.
2142+ FoundNonConstantDistanceDependence |= CommonStride.has_value ();
2143+ }
21362144
21372145 if (!HasSameSize) {
21382146 LLVM_DEBUG (dbgs () << " LAA: ReadWrite-Write positive dependency with "
21392147 " different type sizes\n " );
21402148 return Dependence::Unknown;
21412149 }
21422150
2143- // The logic below currently only supports StrideA == StrideB, i.e. there's a
2144- // common stride.
21452151 if (!CommonStride)
21462152 return Dependence::Unknown;
21472153
2148- const APInt &Val = C->getAPInt ();
2149- int64_t Distance = Val.getSExtValue ();
2150-
21512154 // Bail out early if passed-in parameters make vectorization not feasible.
21522155 unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
21532156 VectorizerParams::VectorizationFactor : 1 );
@@ -2172,8 +2175,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
21722175 // | A[0] | | A[2] | | A[4] | | A[6] | |
21732176 // | B[0] | | B[2] | | B[4] |
21742177 //
2175- // Distance needs for vectorizing iterations except the last iteration:
2176- // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
2178+ // MinDistance needs for vectorizing iterations except the last iteration:
2179+ // 4 * 2 * (MinNumIter - 1). MinDistance needs for the last iteration: 4.
21772180 // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
21782181 //
21792182 // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
@@ -2182,11 +2185,22 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
21822185 // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
21832186 // the minimum distance needed is 28, which is greater than distance. It is
21842187 // not safe to do vectorization.
2188+
2189+ // We know that Dist is positive, but it may not be constant. Use the signed
2190+ // minimum for computations below, as this ensures we compute the closest
2191+ // possible dependence distance.
21852192 uint64_t MinDistanceNeeded =
2186- TypeByteSize * (*CommonStride) * (MinNumIter - 1 ) + TypeByteSize;
2187- if (MinDistanceNeeded > static_cast <uint64_t >(Distance)) {
2188- LLVM_DEBUG (dbgs () << " LAA: Failure because of positive distance "
2189- << Distance << ' \n ' );
2193+ TypeByteSize * *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
2194+ if (MinDistanceNeeded > static_cast <uint64_t >(MinDistance)) {
2195+ if (!isa<SCEVConstant>(Dist)) {
2196+ // For non-constant distances, we checked the lower bound of the
2197+ // dependence distance and the distance may be larger at runtime (and safe
2198+ // for vectorization). Classify it as Unknown, so we re-try with runtime
2199+ // checks.
2200+ return Dependence::Unknown;
2201+ }
2202+ LLVM_DEBUG (dbgs () << " LAA: Failure because of positive minimum distance "
2203+ << MinDistance << ' \n ' );
21902204 return Dependence::Backward;
21912205 }
21922206
@@ -2215,12 +2229,13 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
22152229 // is 8, which is less than 2 and forbidden vectorization, But actually
22162230 // both A and B could be vectorized by 2 iterations.
22172231 MinDepDistBytes =
2218- std::min (static_cast <uint64_t >(Distance ), MinDepDistBytes);
2232+ std::min (static_cast <uint64_t >(MinDistance ), MinDepDistBytes);
22192233
22202234 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
22212235 uint64_t MinDepDistBytesOld = MinDepDistBytes;
22222236 if (IsTrueDataDependence && EnableForwardingConflictDetection &&
2223- couldPreventStoreLoadForward (Distance, TypeByteSize)) {
2237+ isa<SCEVConstant>(Dist) &&
2238+ couldPreventStoreLoadForward (MinDistance, TypeByteSize)) {
22242239 // Sanity check that we didn't update MinDepDistBytes when calling
22252240 // couldPreventStoreLoadForward
22262241 assert (MinDepDistBytes == MinDepDistBytesOld &&
@@ -2232,10 +2247,18 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
22322247
22332248 // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
22342249 // since there is a backwards dependency.
2235- uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * ( *CommonStride) );
2236- LLVM_DEBUG (dbgs () << " LAA: Positive distance " << Val. getSExtValue ()
2250+ uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * *CommonStride);
2251+ LLVM_DEBUG (dbgs () << " LAA: Positive min distance " << MinDistance
22372252 << " with max VF = " << MaxVF << ' \n ' );
2253+
22382254 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8 ;
2255+ if (!isa<SCEVConstant>(Dist) && MaxVFInBits < MaxTargetVectorWidthInBits) {
2256+ // For non-constant distances, we checked the lower bound of the dependence
2257+ // distance and the distance may be larger at runtime (and safe for
2258+ // vectorization). Classify it as Unknown, so we re-try with runtime checks.
2259+ return Dependence::Unknown;
2260+ }
2261+
22392262 MaxSafeVectorWidthInBits = std::min (MaxSafeVectorWidthInBits, MaxVFInBits);
22402263 return Dependence::BackwardVectorizable;
22412264}
@@ -3018,11 +3041,28 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
30183041}
30193042
30203043LoopAccessInfo::LoopAccessInfo (Loop *L, ScalarEvolution *SE,
3044+ const TargetTransformInfo *TTI,
30213045 const TargetLibraryInfo *TLI, AAResults *AA,
30223046 DominatorTree *DT, LoopInfo *LI)
30233047 : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
3024- PtrRtChecking (nullptr ),
3025- DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) {
3048+ PtrRtChecking (nullptr ), TheLoop(L) {
3049+ unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned >::max ();
3050+ if (TTI) {
3051+ TypeSize FixedWidth =
3052+ TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector);
3053+ if (FixedWidth.isNonZero ()) {
3054+ // Scale the vector width by 2 as rough estimate to also consider
3055+ // interleaving.
3056+ MaxTargetVectorWidthInBits = FixedWidth.getFixedValue () * 2 ;
3057+ }
3058+
3059+ TypeSize ScalableWidth =
3060+ TTI->getRegisterBitWidth (TargetTransformInfo::RGK_ScalableVector);
3061+ if (ScalableWidth.isNonZero ())
3062+ MaxTargetVectorWidthInBits = std::numeric_limits<unsigned >::max ();
3063+ }
3064+ DepChecker =
3065+ std::make_unique<MemoryDepChecker>(*PSE, L, MaxTargetVectorWidthInBits);
30263066 PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
30273067 if (canAnalyzeLoop ()) {
30283068 analyzeLoop (AA, LI, TLI, DT);
@@ -3082,7 +3122,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
30823122
30833123 if (I.second )
30843124 I.first ->second =
3085- std::make_unique<LoopAccessInfo>(&L, &SE, TLI, &AA, &DT, &LI);
3125+ std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, &LI);
30863126
30873127 return *I.first ->second ;
30883128}
@@ -3111,8 +3151,9 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
31113151 auto &AA = FAM.getResult <AAManager>(F);
31123152 auto &DT = FAM.getResult <DominatorTreeAnalysis>(F);
31133153 auto &LI = FAM.getResult <LoopAnalysis>(F);
3154+ auto &TTI = FAM.getResult <TargetIRAnalysis>(F);
31143155 auto &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
3115- return LoopAccessInfoManager (SE, AA, DT, LI, &TLI);
3156+ return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, & TLI);
31163157}
31173158
31183159AnalysisKey LoopAccessAnalysis::Key;
0 commit comments