2929#include " llvm/Analysis/LoopInfo.h"
3030#include " llvm/Analysis/LoopIterator.h"
3131#include " llvm/Analysis/MemoryLocation.h"
32+ #include " llvm/Analysis/MemorySSA.h"
3233#include " llvm/Analysis/OptimizationRemarkEmitter.h"
3334#include " llvm/Analysis/ScalarEvolution.h"
3435#include " llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -1777,6 +1778,232 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
17771778 return Diff == 1 ;
17781779}
17791780
1781+ // / Collects all subexpressions that appear within a given SCEV tree.
1782+ struct SCEVSubexprCollector : public SCEVVisitor <SCEVSubexprCollector, void > {
1783+ SmallPtrSet<const SCEV *, 4 > &Subs;
1784+ SCEVSubexprCollector (SmallPtrSet<const SCEV *, 4 > &S) : Subs(S) {}
1785+
1786+ template <typename Operands> void visitOperands (Operands operands) {
1787+ for (auto *Op : operands)
1788+ visit (Op);
1789+ }
1790+ void visitConstant (const SCEVConstant *C) { Subs.insert (C); }
1791+ void visitUnknown (const SCEVUnknown *U) { Subs.insert (U); }
1792+ void visitAddExpr (const SCEVAddExpr *E) {
1793+ Subs.insert (E);
1794+ for (auto *Op : E->operands ())
1795+ visit (Op);
1796+ }
1797+ void visitMulExpr (const SCEVMulExpr *E) {
1798+ Subs.insert (E);
1799+ for (auto *Op : E->operands ())
1800+ visit (Op);
1801+ }
1802+ void visitAddRecExpr (const SCEVAddRecExpr *E) {
1803+ Subs.insert (E);
1804+ for (auto *Op : E->operands ())
1805+ visit (Op);
1806+ }
1807+ void visitSMaxExpr (const SCEVSMaxExpr *E) {
1808+ Subs.insert (E);
1809+ for (auto *Op : E->operands ())
1810+ visit (Op);
1811+ }
1812+ void visitSMinExpr (const SCEVSMinExpr *E) {
1813+ Subs.insert (E);
1814+ for (auto *Op : E->operands ())
1815+ visit (Op);
1816+ }
1817+ void visitUMinExpr (const SCEVUMinExpr *E) {
1818+ Subs.insert (E);
1819+ for (auto *Op : E->operands ())
1820+ visit (Op);
1821+ }
1822+ void visitUMaxExpr (const SCEVUMaxExpr *E) {
1823+ Subs.insert (E);
1824+ for (auto *Op : E->operands ())
1825+ visit (Op);
1826+ }
1827+ void visitMinMaxExpr (const SCEVMinMaxExpr *E) {
1828+ Subs.insert (E);
1829+ for (auto *Op : E->operands ())
1830+ visit (Op);
1831+ }
1832+ void visitUDivExpr (const SCEVUDivExpr *E) {
1833+ Subs.insert (E);
1834+ visit (E->getLHS ());
1835+ visit (E->getRHS ());
1836+ }
1837+ void visitZeroExtendExpr (const SCEVZeroExtendExpr *E) {
1838+ Subs.insert (E);
1839+ visit (E->getOperand ());
1840+ }
1841+ void visitSignExtendExpr (const SCEVSignExtendExpr *E) {
1842+ Subs.insert (E);
1843+ visit (E->getOperand ());
1844+ }
1845+ void visitTruncateExpr (const SCEVTruncateExpr *E) {
1846+ Subs.insert (E);
1847+ visit (E->getOperand ());
1848+ }
1849+ void visitCouldNotCompute (const SCEVCouldNotCompute *E) { Subs.insert (E); }
1850+ void visitVScale (const SCEVVScale *E) {
1851+ Subs.insert (E);
1852+ visitOperands (E->operands ());
1853+ }
1854+ void visitPtrToIntExpr (const SCEVPtrToIntExpr *E) {
1855+ Subs.insert (E);
1856+ visitOperands (E->operands ());
1857+ }
1858+ void visitSequentialUMinExpr (const SCEVSequentialUMinExpr *E) {
1859+ Subs.insert (E);
1860+ visitOperands (E->operands ());
1861+ }
1862+ };
1863+
1864+ bool MemoryDepChecker::isInvariantLoadHoistable (
1865+ LoadInst *L, ScalarEvolution &SE, StoreInst **S, const SCEV **StepSCEV,
1866+ SmallVectorImpl<Instruction *> *Instructions) const {
1867+ assert (L != nullptr );
1868+ assert (InnermostLoop->isLoopInvariant (L->getPointerOperand ()));
1869+
1870+ if (!MSSA)
1871+ return false ;
1872+
1873+ MemoryAccess *MA = MSSA->getMemoryAccess (L);
1874+ auto QLoc = MemoryLocation::get (L);
1875+
1876+ SmallVector<StoreInst *> Stores;
1877+ SmallVector<LoadInst *> Loads;
1878+
1879+ for (auto &&I : *InnermostLoop->getHeader ()) {
1880+ if (auto *Store = dyn_cast<StoreInst>(&I)) {
1881+ AliasResult AR = AA->alias (MemoryLocation::get (Store), QLoc);
1882+ if (AR == AliasResult::MustAlias)
1883+ Stores.push_back (Store);
1884+ }
1885+ if (auto *Load = dyn_cast<LoadInst>(&I)) {
1886+ AliasResult AR = AA->alias (MemoryLocation::get (Load), QLoc);
1887+ if (AR == AliasResult::MustAlias)
1888+ Loads.push_back (Load);
1889+ }
1890+ }
1891+
1892+ if (Loads.size () != 1 || Loads[0 ]->isVolatile () || Stores.size () != 1 ||
1893+ Stores[0 ]->isVolatile ())
1894+ return false ;
1895+
1896+ // I have the memory PHI, so I know where is the backedge
1897+ // I have to find all memory accesses to the same cell (that I care)
1898+ // There should be a single memory use and a single memorydef
1899+ // memory use should have MemoryPhi as transitive clobber
1900+ // backedge should have the MemoryDef as a transitive clobber (must-alias) (?)
1901+ MemoryAccess *Clobber = MSSA->getWalker ()->getClobberingMemoryAccess (MA);
1902+ while (auto *MD = dyn_cast<MemoryUseOrDef>(Clobber)) {
1903+ Instruction *DefI = MD->getMemoryInst ();
1904+
1905+ if (!DefI)
1906+ return false ;
1907+
1908+ AliasResult AR = AA->alias (MemoryLocation::get (DefI), QLoc);
1909+
1910+ Clobber = MD->getDefiningAccess ();
1911+
1912+ // We assume runtime aliasing check will be used
1913+ if (AR == AliasResult::MustAlias)
1914+ return false ;
1915+ }
1916+
1917+ MemoryAccess *MS = MSSA->getMemoryAccess (Stores[0 ]);
1918+ MemoryAccess *StoreClobber = MSSA->getWalker ()->getClobberingMemoryAccess (MS);
1919+ while (true ) {
1920+ if (isa<MemoryPhi>(StoreClobber))
1921+ break ;
1922+ if (auto *MD = dyn_cast<MemoryUseOrDef>(StoreClobber)) {
1923+ Instruction *DefI = MD->getMemoryInst ();
1924+
1925+ if (!DefI)
1926+ return false ;
1927+
1928+ AliasResult AR = AA->alias (MemoryLocation::get (DefI), QLoc);
1929+
1930+ StoreClobber = MD->getDefiningAccess ();
1931+
1932+ if (AR == AliasResult::MustAlias)
1933+ return false ;
1934+ }
1935+ }
1936+
1937+ if (!SE.isSCEVable (Stores[0 ]->getValueOperand ()->getType ()))
1938+ return false ;
1939+
1940+ const SCEV *LoadSCEV = SE.getUnknown (L);
1941+ const SCEV *StoreSCEV = SE.getSCEV (Stores[0 ]->getValueOperand ());
1942+
1943+ auto Step = SE.getMinusSCEV (StoreSCEV, LoadSCEV);
1944+
1945+ if (isa<SCEVCouldNotCompute>(Step) ||
1946+ !SE.isLoopInvariant (Step, InnermostLoop))
1947+ return false ;
1948+
1949+ SmallVector<Instruction *, 4 > WL;
1950+
1951+ SmallPtrSet<Instruction *, 4 > Slice;
1952+ SmallPtrSet<const SCEV *, 4 > Subs;
1953+ SCEVSubexprCollector Collector (Subs);
1954+ Collector.visit (StoreSCEV);
1955+
1956+ // Register all instructions that matches the SCEV
1957+ // to allow its removal when hoisting it and
1958+ // re-expanding the SCEV
1959+ auto enqueueIfMatches = [&](Value *X) {
1960+ if (auto *XI = dyn_cast<Instruction>(X)) {
1961+ const SCEV *SX = SE.getSCEV (XI);
1962+ if (Subs.contains (SX) && Slice.insert (XI).second )
1963+ WL.push_back (XI);
1964+ }
1965+ };
1966+
1967+ enqueueIfMatches (Stores[0 ]->getValueOperand ());
1968+
1969+ while (!WL.empty ()) {
1970+ Instruction *I = WL.pop_back_val ();
1971+
1972+ for (Value *Op : I->operands ()) {
1973+ if (isa<Constant>(Op) || isa<Argument>(Op))
1974+ continue ;
1975+ enqueueIfMatches (Op);
1976+ }
1977+ }
1978+
1979+ auto hasExternalUsers =
1980+ [&Stores](const SmallPtrSetImpl<Instruction *> &Slice) {
1981+ for (Instruction *I : Slice)
1982+ for (Use &U : I->uses ())
1983+ if (auto *UserI = dyn_cast<Instruction>(U.getUser ())) {
1984+ if (isa<DbgInfoIntrinsic>(UserI))
1985+ continue ;
1986+ if (!Slice.count (UserI) &&
1987+ !std::count (Stores.begin (), Stores.end (), UserI))
1988+ return true ;
1989+ }
1990+ return false ;
1991+ };
1992+
1993+ if (hasExternalUsers (Slice))
1994+ return false ;
1995+
1996+ if (S)
1997+ *S = Stores[0 ];
1998+ if (StepSCEV)
1999+ *StepSCEV = Step;
2000+
2001+ if (Instructions)
2002+ Instructions->insert (Instructions->end (), Slice.begin (), Slice.end ());
2003+
2004+ return true ;
2005+ }
2006+
17802007void MemoryDepChecker::addAccess (StoreInst *SI) {
17812008 visitPointers (SI->getPointerOperand (), *InnermostLoop,
17822009 [this , SI](Value *Ptr) {
@@ -2505,7 +2732,7 @@ bool LoopAccessInfo::canAnalyzeLoop() {
25052732
25062733bool LoopAccessInfo::analyzeLoop (AAResults *AA, const LoopInfo *LI,
25072734 const TargetLibraryInfo *TLI,
2508- DominatorTree *DT) {
2735+ DominatorTree *DT, MemorySSA *MSSA ) {
25092736 // Holds the Load and Store instructions.
25102737 SmallVector<LoadInst *, 16 > Loads;
25112738 SmallVector<StoreInst *, 16 > Stores;
@@ -3064,7 +3291,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30643291 const TargetTransformInfo *TTI,
30653292 const TargetLibraryInfo *TLI, AAResults *AA,
30663293 DominatorTree *DT, LoopInfo *LI,
3067- AssumptionCache *AC, bool AllowPartial)
3294+ AssumptionCache *AC, MemorySSA *MSSA,
3295+ bool AllowPartial)
30683296 : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
30693297 PtrRtChecking (nullptr ), TheLoop(L), AllowPartial(AllowPartial) {
30703298 unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned >::max ();
@@ -3075,11 +3303,12 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30753303 TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector) * 2 ;
30763304
30773305 DepChecker = std::make_unique<MemoryDepChecker>(
3078- *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3306+ *PSE, AC, MSSA, DT, AA, L, SymbolicStrides, MaxTargetVectorWidthInBits,
3307+ LoopGuards);
30793308 PtrRtChecking =
30803309 std::make_unique<RuntimePointerChecking>(*DepChecker, SE, LoopGuards);
30813310 if (canAnalyzeLoop ())
3082- CanVecMem = analyzeLoop (AA, LI, TLI, DT);
3311+ CanVecMem = analyzeLoop (AA, LI, TLI, DT, MSSA );
30833312}
30843313
30853314void LoopAccessInfo::print (raw_ostream &OS, unsigned Depth) const {
@@ -3145,7 +3374,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
31453374 // or if it was created with a different value of AllowPartial.
31463375 if (Inserted || It->second ->hasAllowPartial () != AllowPartial)
31473376 It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
3148- &LI, AC, AllowPartial);
3377+ &LI, AC, MSSA, AllowPartial);
31493378
31503379 return *It->second ;
31513380}
@@ -3189,7 +3418,9 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
31893418 auto &TTI = FAM.getResult <TargetIRAnalysis>(F);
31903419 auto &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
31913420 auto &AC = FAM.getResult <AssumptionAnalysis>(F);
3192- return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, &TLI, &AC);
3421+ auto &MSSA = FAM.getResult <MemorySSAAnalysis>(F);
3422+ return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, &TLI, &AC,
3423+ &MSSA.getMSSA ());
31933424}
31943425
31953426AnalysisKey LoopAccessAnalysis::Key;
0 commit comments