1414#include " llvm/Transforms/Scalar/MemCpyOptimizer.h"
1515#include " llvm/ADT/DenseSet.h"
1616#include " llvm/ADT/STLExtras.h"
17+ #include " llvm/ADT/ScopeExit.h"
1718#include " llvm/ADT/SmallVector.h"
1819#include " llvm/ADT/Statistic.h"
1920#include " llvm/ADT/iterator_range.h"
@@ -1121,28 +1122,67 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
11211122bool MemCpyOptPass::processMemCpyMemCpyDependence (MemCpyInst *M,
11221123 MemCpyInst *MDep,
11231124 BatchAAResults &BAA) {
1124- // We can only transforms memcpy's where the dest of one is the source of the
1125- // other.
1126- if (M->getSource () != MDep->getDest () || MDep->isVolatile ())
1127- return false ;
1128-
11291125 // If dep instruction is reading from our current input, then it is a noop
1130- // transfer and substituting the input won't change this instruction. Just
1131- // ignore the input and let someone else zap MDep. This handles cases like:
1126+ // transfer and substituting the input won't change this instruction. Just
1127+ // ignore the input and let someone else zap MDep. This handles cases like:
11321128 // memcpy(a <- a)
11331129 // memcpy(b <- a)
11341130 if (M->getSource () == MDep->getSource ())
11351131 return false ;
11361132
1137- // Second, the length of the memcpy's must be the same, or the preceding one
1133+ // We can only optimize non-volatile memcpy's.
1134+ if (MDep->isVolatile ())
1135+ return false ;
1136+
1137+ int64_t MForwardOffset = 0 ;
1138+ const DataLayout &DL = M->getModule ()->getDataLayout ();
1139+ // We can only transforms memcpy's where the dest of one is the source of the
1140+ // other, or they have an offset in a range.
1141+ if (M->getSource () != MDep->getDest ()) {
1142+ std::optional<int64_t > Offset =
1143+ M->getSource ()->getPointerOffsetFrom (MDep->getDest (), DL);
1144+ if (!Offset || *Offset < 0 )
1145+ return false ;
1146+ MForwardOffset = *Offset;
1147+ }
1148+
1149+ // The length of the memcpy's must be the same, or the preceding one
11381150 // must be larger than the following one.
1139- if (MDep->getLength () != M->getLength ()) {
1151+ if (MForwardOffset != 0 || ( MDep->getLength () != M->getLength () )) {
11401152 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength ());
11411153 auto *MLen = dyn_cast<ConstantInt>(M->getLength ());
1142- if (!MDepLen || !MLen || MDepLen->getZExtValue () < MLen->getZExtValue ())
1154+ if (!MDepLen || !MLen ||
1155+ MDepLen->getZExtValue () < MLen->getZExtValue () + MForwardOffset)
11431156 return false ;
11441157 }
11451158
1159+ IRBuilder<> Builder (M);
1160+ auto *CopySource = MDep->getRawSource ();
1161+ auto CleanupOnFailure = llvm::make_scope_exit ([&CopySource] {
1162+ if (CopySource->use_empty ())
1163+ cast<Instruction>(CopySource)->eraseFromParent ();
1164+ });
1165+ MaybeAlign CopySourceAlign = MDep->getSourceAlign ();
1166+ // We just need to calculate the actual size of the copy.
1167+ auto MCopyLoc = MemoryLocation::getForSource (MDep).getWithNewSize (
1168+ MemoryLocation::getForSource (M).Size );
1169+
1170+ // We need to update `MCopyLoc` if an offset exists.
1171+ if (MForwardOffset > 0 ) {
1172+ // The copy destination of `M` maybe can serve as the source of copying.
1173+ std::optional<int64_t > MDestOffset =
1174+ M->getRawDest ()->getPointerOffsetFrom (MDep->getRawSource (), DL);
1175+ if (MDestOffset && *MDestOffset == MForwardOffset)
1176+ CopySource = M->getRawDest ();
1177+ else
1178+ CopySource = Builder.CreateInBoundsPtrAdd (
1179+ CopySource, ConstantInt::get (Type::getInt64Ty (Builder.getContext ()),
1180+ MForwardOffset));
1181+ MCopyLoc = MCopyLoc.getWithNewPtr (CopySource);
1182+ if (CopySourceAlign)
1183+ CopySourceAlign = commonAlignment (*CopySourceAlign, MForwardOffset);
1184+ }
1185+
11461186 // Verify that the copied-from memory doesn't change in between the two
11471187 // transfers. For example, in:
11481188 // memcpy(a <- b)
@@ -1152,10 +1192,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11521192 //
11531193 // TODO: If the code between M and MDep is transparent to the destination "c",
11541194 // then we could still perform the xform by moving M up to the first memcpy.
1155- // TODO: It would be sufficient to check the MDep source up to the memcpy
1156- // size of M, rather than MDep.
1157- if (writtenBetween (MSSA, BAA, MemoryLocation::getForSource (MDep),
1158- MSSA->getMemoryAccess (MDep), MSSA->getMemoryAccess (M)))
1195+ if (writtenBetween (MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess (MDep),
1196+ MSSA->getMemoryAccess (M)))
11591197 return false ;
11601198
11611199 // If the dest of the second might alias the source of the first, then the
@@ -1179,23 +1217,22 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11791217
11801218 // TODO: Is this worth it if we're creating a less aligned memcpy? For
11811219 // example we could be moving from movaps -> movq on x86.
1182- IRBuilder<> Builder (M);
11831220 Instruction *NewM;
11841221 if (UseMemMove)
1185- NewM = Builder. CreateMemMove (M-> getRawDest (), M-> getDestAlign (),
1186- MDep-> getRawSource (), MDep-> getSourceAlign () ,
1187- M->getLength (), M->isVolatile ());
1222+ NewM =
1223+ Builder. CreateMemMove (M-> getRawDest (), M-> getDestAlign (), CopySource ,
1224+ CopySourceAlign, M->getLength (), M->isVolatile ());
11881225 else if (isa<MemCpyInlineInst>(M)) {
11891226 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
11901227 // never allowed since that would allow the latter to be lowered as a call
11911228 // to an external function.
1192- NewM = Builder.CreateMemCpyInline (
1193- M-> getRawDest (), M-> getDestAlign (), MDep-> getRawSource () ,
1194- MDep-> getSourceAlign (), M->getLength (), M->isVolatile ());
1229+ NewM = Builder.CreateMemCpyInline (M-> getRawDest (), M-> getDestAlign (),
1230+ CopySource, CopySourceAlign ,
1231+ M->getLength (), M->isVolatile ());
11951232 } else
1196- NewM = Builder. CreateMemCpy (M-> getRawDest (), M-> getDestAlign (),
1197- MDep-> getRawSource (), MDep-> getSourceAlign () ,
1198- M->getLength (), M->isVolatile ());
1233+ NewM =
1234+ Builder. CreateMemCpy (M-> getRawDest (), M-> getDestAlign (), CopySource ,
1235+ CopySourceAlign, M->getLength (), M->isVolatile ());
11991236 NewM->copyMetadata (*M, LLVMContext::MD_DIAssignID);
12001237
12011238 assert (isa<MemoryDef>(MSSAU->getMemorySSA ()->getMemoryAccess (M)));
0 commit comments