1414#include " llvm/Transforms/Scalar/MemCpyOptimizer.h"
1515#include " llvm/ADT/DenseSet.h"
1616#include " llvm/ADT/STLExtras.h"
17+ #include " llvm/ADT/ScopeExit.h"
1718#include " llvm/ADT/SmallVector.h"
1819#include " llvm/ADT/Statistic.h"
1920#include " llvm/ADT/iterator_range.h"
@@ -1124,28 +1125,67 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
11241125bool MemCpyOptPass::processMemCpyMemCpyDependence (MemCpyInst *M,
11251126 MemCpyInst *MDep,
11261127 BatchAAResults &BAA) {
1127- // We can only transforms memcpy's where the dest of one is the source of the
1128- // other.
1129- if (M->getSource () != MDep->getDest () || MDep->isVolatile ())
1130- return false ;
1131-
11321128 // If dep instruction is reading from our current input, then it is a noop
1133- // transfer and substituting the input won't change this instruction. Just
1134- // ignore the input and let someone else zap MDep. This handles cases like:
1129+ // transfer and substituting the input won't change this instruction. Just
1130+ // ignore the input and let someone else zap MDep. This handles cases like:
11351131 // memcpy(a <- a)
11361132 // memcpy(b <- a)
11371133 if (M->getSource () == MDep->getSource ())
11381134 return false ;
11391135
1140- // Second, the length of the memcpy's must be the same, or the preceding one
1136+ // We can only optimize non-volatile memcpy's.
1137+ if (MDep->isVolatile ())
1138+ return false ;
1139+
1140+ int64_t MForwardOffset = 0 ;
1141+ const DataLayout &DL = M->getModule ()->getDataLayout ();
1142+ // We can only transforms memcpy's where the dest of one is the source of the
1143+ // other, or they have an offset in a range.
1144+ if (M->getSource () != MDep->getDest ()) {
1145+ std::optional<int64_t > Offset =
1146+ M->getSource ()->getPointerOffsetFrom (MDep->getDest (), DL);
1147+ if (!Offset || *Offset < 0 )
1148+ return false ;
1149+ MForwardOffset = *Offset;
1150+ }
1151+
1152+ // The length of the memcpy's must be the same, or the preceding one
11411153 // must be larger than the following one.
1142- if (MDep->getLength () != M->getLength ()) {
1154+ if (MForwardOffset != 0 || ( MDep->getLength () != M->getLength () )) {
11431155 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength ());
11441156 auto *MLen = dyn_cast<ConstantInt>(M->getLength ());
1145- if (!MDepLen || !MLen || MDepLen->getZExtValue () < MLen->getZExtValue ())
1157+ if (!MDepLen || !MLen ||
1158+ MDepLen->getZExtValue () < MLen->getZExtValue () + MForwardOffset)
11461159 return false ;
11471160 }
11481161
1162+ IRBuilder<> Builder (M);
1163+ auto *CopySource = MDep->getRawSource ();
1164+ auto CleanupOnFailure = llvm::make_scope_exit ([&CopySource] {
1165+ if (CopySource->use_empty ())
1166+ cast<Instruction>(CopySource)->eraseFromParent ();
1167+ });
1168+ MaybeAlign CopySourceAlign = MDep->getSourceAlign ();
1169+ // We just need to calculate the actual size of the copy.
1170+ auto MCopyLoc = MemoryLocation::getForSource (MDep).getWithNewSize (
1171+ MemoryLocation::getForSource (M).Size );
1172+
1173+ // We need to update `MCopyLoc` if an offset exists.
1174+ if (MForwardOffset > 0 ) {
1175+ // The copy destination of `M` maybe can serve as the source of copying.
1176+ std::optional<int64_t > MDestOffset =
1177+ M->getRawDest ()->getPointerOffsetFrom (MDep->getRawSource (), DL);
1178+ if (MDestOffset && *MDestOffset == MForwardOffset)
1179+ CopySource = M->getRawDest ();
1180+ else
1181+ CopySource = Builder.CreateInBoundsPtrAdd (
1182+ CopySource, ConstantInt::get (Type::getInt64Ty (Builder.getContext ()),
1183+ MForwardOffset));
1184+ MCopyLoc = MCopyLoc.getWithNewPtr (CopySource);
1185+ if (CopySourceAlign)
1186+ CopySourceAlign = commonAlignment (*CopySourceAlign, MForwardOffset);
1187+ }
1188+
11491189 // Verify that the copied-from memory doesn't change in between the two
11501190 // transfers. For example, in:
11511191 // memcpy(a <- b)
@@ -1155,10 +1195,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11551195 //
11561196 // TODO: If the code between M and MDep is transparent to the destination "c",
11571197 // then we could still perform the xform by moving M up to the first memcpy.
1158- // TODO: It would be sufficient to check the MDep source up to the memcpy
1159- // size of M, rather than MDep.
1160- if (writtenBetween (MSSA, BAA, MemoryLocation::getForSource (MDep),
1161- MSSA->getMemoryAccess (MDep), MSSA->getMemoryAccess (M)))
1198+ if (writtenBetween (MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess (MDep),
1199+ MSSA->getMemoryAccess (M)))
11621200 return false ;
11631201
11641202 // No need to create `memcpy(a <- a)`.
@@ -1191,23 +1229,22 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11911229
11921230 // TODO: Is this worth it if we're creating a less aligned memcpy? For
11931231 // example we could be moving from movaps -> movq on x86.
1194- IRBuilder<> Builder (M);
11951232 Instruction *NewM;
11961233 if (UseMemMove)
1197- NewM = Builder. CreateMemMove (M-> getRawDest (), M-> getDestAlign (),
1198- MDep-> getRawSource (), MDep-> getSourceAlign () ,
1199- M->getLength (), M->isVolatile ());
1234+ NewM =
1235+ Builder. CreateMemMove (M-> getRawDest (), M-> getDestAlign (), CopySource ,
1236+ CopySourceAlign, M->getLength (), M->isVolatile ());
12001237 else if (isa<MemCpyInlineInst>(M)) {
12011238 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
12021239 // never allowed since that would allow the latter to be lowered as a call
12031240 // to an external function.
1204- NewM = Builder.CreateMemCpyInline (
1205- M-> getRawDest (), M-> getDestAlign (), MDep-> getRawSource () ,
1206- MDep-> getSourceAlign (), M->getLength (), M->isVolatile ());
1241+ NewM = Builder.CreateMemCpyInline (M-> getRawDest (), M-> getDestAlign (),
1242+ CopySource, CopySourceAlign ,
1243+ M->getLength (), M->isVolatile ());
12071244 } else
1208- NewM = Builder. CreateMemCpy (M-> getRawDest (), M-> getDestAlign (),
1209- MDep-> getRawSource (), MDep-> getSourceAlign () ,
1210- M->getLength (), M->isVolatile ());
1245+ NewM =
1246+ Builder. CreateMemCpy (M-> getRawDest (), M-> getDestAlign (), CopySource ,
1247+ CopySourceAlign, M->getLength (), M->isVolatile ());
12111248 NewM->copyMetadata (*M, LLVMContext::MD_DIAssignID);
12121249
12131250 assert (isa<MemoryDef>(MSSAU->getMemorySSA ()->getMemoryAccess (M)));
0 commit comments