@@ -1366,56 +1366,68 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
13661366
13671367// / Determine whether the pointer V had only undefined content (due to Def) up
13681368// / to the given Size, either because it was freshly alloca'd or started its
1369- // / lifetime.
1370- static bool hasUndefContents (MemorySSA *MSSA, BatchAAResults &AA, Value *V,
1371- MemoryDef *Def, Value *Size) {
1372- if (MSSA->isLiveOnEntryDef (Def))
1373- return isa<AllocaInst>(getUnderlyingObject (V));
1374-
1375- if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst ())) {
1376- if (II->getIntrinsicID () == Intrinsic::lifetime_start) {
1377- auto *LTSize = cast<ConstantInt>(II->getArgOperand (0 ));
1378-
1379- if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1380- if (AA.isMustAlias (V, II->getArgOperand (1 )) &&
1381- LTSize->getZExtValue () >= CSize->getZExtValue ())
1382- return true ;
1383- }
1369+ // / lifetime by walking the MSSA graph.
1370+ static bool hadUndefContentsBefore (MemorySSA *MSSA, BatchAAResults &BAA,
1371+ Value *V, MemoryAccess *Clobber,
1372+ MemoryLocation Loc, Value *Size) {
1373+ while (1 ) {
1374+ Clobber = MSSA->getWalker ()->getClobberingMemoryAccess (Clobber, Loc, BAA);
1375+ MemoryDef *Def = dyn_cast<MemoryDef>(Clobber);
1376+ if (!Def)
1377+ return false ;
1378+
1379+ if (MSSA->isLiveOnEntryDef (Def))
1380+ return isa<AllocaInst>(getUnderlyingObject (V));
1381+
1382+ if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst ())) {
1383+ if (II->getIntrinsicID () == Intrinsic::lifetime_start) {
1384+ auto *LTSize = cast<ConstantInt>(II->getArgOperand (0 ));
13841385
1385- // If the lifetime.start covers a whole alloca (as it almost always
1386- // does) and we're querying a pointer based on that alloca, then we know
1387- // the memory is definitely undef, regardless of how exactly we alias.
1388- // The size also doesn't matter, as an out-of-bounds access would be UB.
1389- if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject (V))) {
1390- if (getUnderlyingObject (II->getArgOperand (1 )) == Alloca) {
1391- const DataLayout &DL = Alloca->getDataLayout ();
1392- if (std::optional<TypeSize> AllocaSize =
1393- Alloca->getAllocationSize (DL))
1394- if (*AllocaSize == LTSize->getValue ())
1386+ if (Size)
1387+ if (auto CSize = dyn_cast<ConstantInt>(Size))
1388+ if (BAA.isMustAlias (V, II->getArgOperand (1 )) &&
1389+ LTSize->getZExtValue () >= CSize->getZExtValue ())
13951390 return true ;
1391+
1392+ // If the lifetime.start covers a whole alloca (as it almost always
1393+ // does) and we're querying a pointer based on that alloca, then we know
1394+ // the memory is definitely undef, regardless of how exactly we alias.
1395+ // The size also doesn't matter, as an out-of-bounds access would be UB.
1396+ if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject (V))) {
1397+ if (getUnderlyingObject (II->getArgOperand (1 )) == Alloca) {
1398+ const DataLayout &DL = Alloca->getDataLayout ();
1399+ if (std::optional<TypeSize> AllocaSize =
1400+ Alloca->getAllocationSize (DL))
1401+ if (*AllocaSize == LTSize->getValue ())
1402+ return true ;
1403+ }
13961404 }
1405+ Clobber = Def->getDefiningAccess ();
1406+ continue ;
1407+ } else if (II->getIntrinsicID () == Intrinsic::lifetime_end) {
1408+ Clobber = Def->getDefiningAccess ();
1409+ continue ;
13971410 }
13981411 }
1399- }
14001412
1401- return false ;
1413+ return false ;
1414+ }
14021415}
14031416
14041417// If the memcpy is larger than the previous, but the memory was undef prior to
14051418// that, we can just ignore the tail. Technically we're only interested in the
14061419// bytes from 0..MemSrcOffset and MemSrcLength+MemSrcOffset..CopySize here, but
1407- // as we can't easily represent this location (hasUndefContents uses mustAlias
1408- // which cannot deal with offsets), we use the full 0..CopySize range.
1420+ // as we can't easily represent this location (hadUndefContentsBefore uses
1421+ // mustAlias which cannot deal with offsets), we use the full 0..CopySize range.
14091422static bool overreadUndefContents (MemorySSA *MSSA, MemCpyInst *MemCpy,
14101423 MemIntrinsic *MemSrc, BatchAAResults &BAA) {
14111424 Value *CopySize = MemCpy->getLength ();
1412- MemoryLocation MemCpyLoc = MemoryLocation::getForSource (MemCpy);
1413- MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess (MemSrc);
1414- MemoryAccess *Clobber = MSSA->getWalker ()->getClobberingMemoryAccess (
1415- MemSrcAccess->getDefiningAccess (), MemCpyLoc, BAA);
1416- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1417- if (hasUndefContents (MSSA, BAA, MemCpy->getSource (), MD, CopySize))
1418- return true ;
1425+ MemoryLocation LoadLoc = MemoryLocation::getForSource (MemCpy);
1426+ MemoryAccess *MemSrcAccess =
1427+ MSSA->getMemoryAccess (MemSrc)->getDefiningAccess ();
1428+ if (hadUndefContentsBefore (MSSA, BAA, MemCpy->getSource (), MemSrcAccess,
1429+ LoadLoc, CopySize))
1430+ return true ;
14191431 return false ;
14201432}
14211433
@@ -1573,11 +1585,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15731585 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
15741586 // practically fill all the bytes of the alloca with an undefined
15751587 // value, although conceptually marked as alive/dead.
1576- int64_t Size = cast<ConstantInt>(UI->getOperand (0 ))->getSExtValue ();
1577- if (Size < 0 || Size == DestSize) {
1578- LifetimeMarkers.push_back (UI);
1579- continue ;
1580- }
1588+ // We don't currently track GEP offsets and sizes, so we don't have
1589+ // a way to check whether this lifetime marker affects the relevant
1590+ // memory regions.
1591+ // While we only really need to delete lifetime.end from Src and
1592+ // lifetime.begin from Dst, those are often implied by the memcpy
1593+ // anyways so hopefully not much is lost by removing all of them.
1594+ LifetimeMarkers.push_back (UI);
1595+ continue ;
15811596 }
15821597 AAMetadataInstrs.insert (UI);
15831598
@@ -1594,9 +1609,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15941609 return true ;
15951610 };
15961611
1597- // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1598- // size lifetime intrinsics. And collect modref inst for the reachability
1599- // check.
1612+ // Check that dest has no Mod/Ref, from the alloca to the Store. And collect
1613+ // modref inst for the reachability check.
16001614 ModRefInfo DestModRef = ModRefInfo::NoModRef;
16011615 MemoryLocation DestLoc (DestAlloca, LocationSize::precise (Size));
16021616 SmallVector<BasicBlock *, 8 > ReachabilityWorklist;
@@ -1779,8 +1793,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
17791793 if (processMemSetMemCpyDependence (M, MDep, BAA))
17801794 return true ;
17811795
1782- MemoryAccess *SrcClobber = MSSA->getWalker ()->getClobberingMemoryAccess (
1783- AnyClobber, MemoryLocation::getForSource (M), BAA);
1796+ MemoryLocation SrcLoc = MemoryLocation::getForSource (M);
1797+ MemoryAccess *SrcClobber =
1798+ MSSA->getWalker ()->getClobberingMemoryAccess (AnyClobber, SrcLoc, BAA);
17841799
17851800 // There are five possible optimizations we can do for memcpy:
17861801 // a) memcpy-memcpy xform which exposes redundance for DSE.
@@ -1820,7 +1835,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
18201835 }
18211836 }
18221837
1823- if (hasUndefContents (MSSA, BAA, M->getSource (), MD, M->getLength ())) {
1838+ if (hadUndefContentsBefore (MSSA, BAA, M->getSource (), AnyClobber, SrcLoc,
1839+ M->getLength ())) {
18241840 LLVM_DEBUG (dbgs () << " Removed memcpy from undef\n " );
18251841 eraseInstruction (M);
18261842 ++NumMemCpyInstr;
0 commit comments