@@ -1367,8 +1367,9 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
13671367 return true ;
13681368}
13691369
1370- // / Determine whether the instruction has undefined content for the given Size,
1371- // / either because it was freshly alloca'd or started its lifetime.
1370+ // / Determine whether the pointer V had only undefined content from Def up to
1371+ // / the given Size, either because it was freshly alloca'd or started its
1372+ // / lifetime.
13721373static bool hasUndefContents (MemorySSA *MSSA, BatchAAResults &AA, Value *V,
13731374 MemoryDef *Def, Value *Size) {
13741375 if (MSSA->isLiveOnEntryDef (Def))
@@ -1403,6 +1404,24 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
14031404 return false ;
14041405}
14051406
1407+ static bool coversInputFully (MemorySSA *MSSA, MemCpyInst *MemCpy,
1408+ MemIntrinsic *MemSrc, BatchAAResults &BAA) {
1409+ // If the memcpy is larger than the previous, but the memory was undef prior
1410+ // to that, we can just ignore the tail. Technically we're only
1411+ // interested in the bytes from 0..MemSrcOffset and
1412+ // MemSrcLength+MemSrcOffset..CopySize here, but as we can't easily represent
1413+ // this location, we use the full 0..CopySize range.
1414+ Value *CopySize = MemCpy->getLength ();
1415+ MemoryLocation MemCpyLoc = MemoryLocation::getForSource (MemCpy);
1416+ MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess (MemSrc);
1417+ MemoryAccess *Clobber = MSSA->getWalker ()->getClobberingMemoryAccess (
1418+ MemSrcAccess->getDefiningAccess (), MemCpyLoc, BAA);
1419+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1420+ if (hasUndefContents (MSSA, BAA, MemCpy->getSource (), MD, CopySize))
1421+ return true ;
1422+ return false ;
1423+ }
1424+
14061425// / Transform memcpy to memset when its source was just memset.
14071426// / In other words, turn:
14081427// / \code
@@ -1418,51 +1437,63 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
14181437bool MemCpyOptPass::performMemCpyToMemSetOptzn (MemCpyInst *MemCpy,
14191438 MemSetInst *MemSet,
14201439 BatchAAResults &BAA) {
1421- // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1422- // memcpying from the same address. Otherwise it is hard to reason about.
1423- if (!BAA.isMustAlias (MemSet->getRawDest (), MemCpy->getRawSource ()))
1424- return false ;
1425-
14261440 Value *MemSetSize = MemSet->getLength ();
14271441 Value *CopySize = MemCpy->getLength ();
14281442
1429- if (MemSetSize != CopySize) {
1430- // Make sure the memcpy doesn't read any more than what the memset wrote.
1431- // Don't worry about sizes larger than i64.
1432-
1433- // A known memset size is required.
1434- auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1435- if (!CMemSetSize)
1443+ int64_t MOffset = 0 ;
1444+ const DataLayout &DL = MemCpy->getModule ()->getDataLayout ();
1445+ // We can only transforms memcpy's where the dest of one is the source of the
1446+ // other, or they have a known offset.
1447+ if (MemCpy->getSource () != MemSet->getDest ()) {
1448+ std::optional<int64_t > Offset =
1449+ MemCpy->getSource ()->getPointerOffsetFrom (MemSet->getDest (), DL);
1450+ if (!Offset)
14361451 return false ;
1452+ MOffset = *Offset;
1453+ }
14371454
1438- // A known memcpy size is also required.
1455+ MaybeAlign MDestAlign = MemCpy->getDestAlign ();
1456+ int64_t MOffsetAligned = MDestAlign.valueOrOne ().value () > 1 && MOffset < 0 ? -(-MOffset & ~(MDestAlign.valueOrOne ().value () - 1 )) : MOffset; // Compute the MOffset that keeps MDest aligned (truncate towards zero)
1457+ if (MOffset != 0 || MemSetSize != CopySize) {
1458+ // Make sure the memcpy doesn't read any more than what the memset wrote, other than undef.
1459+ auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
14391460 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1440- if (!CCopySize)
1441- return false ;
1442- if (CCopySize->getZExtValue () > CMemSetSize->getZExtValue ()) {
1443- // If the memcpy is larger than the memset, but the memory was undef prior
1444- // to the memset, we can just ignore the tail. Technically we're only
1445- // interested in the bytes from MemSetSize..CopySize here, but as we can't
1446- // easily represent this location, we use the full 0..CopySize range.
1447- MemoryLocation MemCpyLoc = MemoryLocation::getForSource (MemCpy);
1448- bool CanReduceSize = false ;
1449- MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess (MemSet);
1450- MemoryAccess *Clobber = MSSA->getWalker ()->getClobberingMemoryAccess (
1451- MemSetAccess->getDefiningAccess (), MemCpyLoc, BAA);
1452- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1453- if (hasUndefContents (MSSA, BAA, MemCpy->getSource (), MD, CopySize))
1454- CanReduceSize = true ;
1455-
1456- if (!CanReduceSize)
1461+ // Don't worry about sizes larger than i64.
1462+ if (!CMemSetSize || !CCopySize || MOffset < 0 ||
1463+ CCopySize->getZExtValue () + MOffset > CMemSetSize->getZExtValue ()) {
1464+ if (!coversInputFully (MSSA, MemCpy, MemSet, BAA))
14571465 return false ;
1458- CopySize = MemSetSize;
1466+
1467+ if (CMemSetSize && CCopySize) {
1468+ // If both have constant sizes and offsets, clip the memcpy to the bounds of the memset if applicable.
1469+ if (CCopySize->getZExtValue () + std::abs (MOffset) > CMemSetSize->getZExtValue ()) {
1470+ if (MOffsetAligned == 0 || (MOffset < 0 && CCopySize->getZExtValue () + MOffset > CMemSetSize->getZExtValue ()))
1471+ CopySize = MemSetSize;
1472+ else
1473+ CopySize = ConstantInt::get (CopySize->getType (), std::max ((int64_t )0 , (int64_t )(CMemSetSize->getZExtValue () - std::abs (MOffsetAligned))));
1474+ }
1475+ else if (MOffsetAligned < 0 ) {
1476+ // Even if CMemSetSize isn't known, if the MOffsetAligned is negative, make sure to clip the new memset
1477+ CopySize = ConstantInt::get (CopySize->getType (), CCopySize->getZExtValue () + MOffsetAligned);
1478+ }
1479+ }
1480+ else if (CCopySize && MOffsetAligned < 0 ) {
1481+ // Even if CMemSetSize isn't known, if the MOffsetAligned is negative, can still clip the new memset
1482+ CopySize = ConstantInt::get (CopySize->getType (), CCopySize->getZExtValue () + MOffsetAligned);
1483+ }
1484+ else {
1485+ MOffsetAligned = 0 ;
1486+ }
14591487 }
14601488 }
14611489
14621490 IRBuilder<> Builder (MemCpy);
1491+ Value *MDest = MemCpy->getRawDest ();
1492+ if (MOffsetAligned < 0 )
1493+ MDest = Builder.CreateInBoundsPtrAdd (MDest, Builder.getInt64 (-MOffsetAligned));
14631494 Instruction *NewM =
1464- Builder.CreateMemSet (MemCpy-> getRawDest () , MemSet->getOperand (1 ),
1465- CopySize, MemCpy-> getDestAlign () );
1495+ Builder.CreateMemSet (MDest , MemSet->getOperand (1 ),
1496+ CopySize, MDestAlign );
14661497 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess (MemCpy));
14671498 auto *NewAccess = MSSAU->createMemoryAccessAfter (NewM, nullptr , LastDef);
14681499 MSSAU->insertDef (cast<MemoryDef>(NewAccess), /* RenameUses=*/ true );
@@ -1683,7 +1714,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
16831714 I->setMetadata (LLVMContext::MD_tbaa_struct, nullptr );
16841715 }
16851716
1686- LLVM_DEBUG (dbgs () << " Stack Move: Performed staack -move optimization\n " );
1717+ LLVM_DEBUG (dbgs () << " Stack Move: Performed stack -move optimization\n " );
16871718 NumStackMove++;
16881719 return true ;
16891720}
0 commit comments