diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 496d2958fc2d0..d1369ae918959 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -14,6 +14,7 @@ #ifndef LLVM_TRANSFORMS_SCALAR_MEMCPYOPTIMIZER_H #define LLVM_TRANSFORMS_SCALAR_MEMCPYOPTIMIZER_H +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/PassManager.h" @@ -64,21 +65,28 @@ class MemCpyOptPass : public PassInfoMixin { private: // Helper functions bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); + bool processLoad(LoadInst *LI, BasicBlock::iterator &BBI, + SmallVectorImpl &NewInsts); bool processStoreOfLoad(StoreInst *SI, LoadInst *LI, const DataLayout &DL, BasicBlock::iterator &BBI); bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); - bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI); + bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI, + SmallVectorImpl &NewInsts); bool processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI); bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, Value *cpyDst, Value *cpySrc, TypeSize cpyLen, Align cpyAlign, BatchAAResults &BAA, std::function GetC); bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, - BatchAAResults &BAA); + BatchAAResults &BAA, + SmallVectorImpl &NewInsts); bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet, BatchAAResults &BAA); bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet, BatchAAResults &BAA); + bool findNewSrc(MemCpyInst *MDep, Instruction *UseInstr, BatchAAResults &BAA, + Value *&NewSrc, MaybeAlign &NewAlign, + SmallVectorImpl &NewInsts); bool processByValArgument(CallBase &CB, unsigned ArgNo); bool processImmutArgument(CallBase &CB, unsigned ArgNo); Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, @@ -90,7 +98,7 @@ class MemCpyOptPass : public PassInfoMixin { bool isMemMoveMemSetDependency(MemMoveInst *M); void eraseInstruction(Instruction *I); - bool iterateOnFunction(Function &F); + bool iterateOnFunction(Function &F, SmallVectorImpl &NewInsts); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 6407f48dc2c05..1dfa6bc787278 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -74,6 +74,7 @@ STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); STATISTIC(NumStackMove, "Number of stack-move optimizations performed"); +STATISTIC(NumLoadInstr, "Number of load instruction optimizations performed"); namespace { @@ -739,6 +740,145 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, return false; } +bool MemCpyOptPass::findNewSrc(MemCpyInst *MDep, Instruction *UseInstr, + BatchAAResults &BAA, Value *&NewSrc, + MaybeAlign &NewAlign, + SmallVectorImpl &NewInsts) { + auto *MemCpy = dyn_cast(UseInstr); + auto *LoadI = dyn_cast(UseInstr); + MemoryLocation UseLoc; + Value *OldSrc; + if (MemCpy) { + UseLoc = MemoryLocation::getForSource(MemCpy); + OldSrc = MemCpy->getSource(); + } else if (LoadI) { + UseLoc = MemoryLocation::get(LoadI); + OldSrc = LoadI->getPointerOperand(); + } else + return false; + uint64_t UseLen = 0; + if (UseLoc.Size.hasValue()) + UseLen = UseLoc.Size.getValue().getKnownMinValue(); + // If dep instruction is reading from our current input, then it is a noop + // transfer and substituting the input won't change this instruction. Just + // ignore the input and let someone else zap MDep. This handles cases like: + // memcpy(a <- a) + // memcpy(b <- a) + if (OldSrc == MDep->getSource()) + return false; + + // We can only optimize non-volatile memcpy's. + if (MDep->isVolatile()) + return false; + + int64_t MForwardOffset = 0; + const DataLayout &DL = MDep->getDataLayout(); + // We can only transforms memcpy's where the dest of one is the source of the + // other, or they have an offset in a range. + if (OldSrc != MDep->getDest()) { + std::optional Offset = + OldSrc->getPointerOffsetFrom(MDep->getDest(), DL); + if (!Offset || *Offset < 0) + return false; + MForwardOffset = *Offset; + } + + // The length of the memcpy's must be the same, or the preceding one + // must be larger than the following one. + if (MForwardOffset != 0 || LoadI || + (MemCpy && MDep->getLength() != MemCpy->getLength())) { + auto *MDepLen = dyn_cast(MDep->getLength()); + if (UseLen == 0 || !MDepLen || + MDepLen->getZExtValue() < UseLen + MForwardOffset) + return false; + } + IRBuilder<> Builder(UseInstr); + NewSrc = MDep->getSource(); + NewAlign = MDep->getSourceAlign(); + // We just need to calculate the actual size of the copy. + auto MCopyLoc = + MemoryLocation::getForSource(MDep).getWithNewSize(UseLoc.Size); + + // When the forwarding offset is greater than 0, we transform + // memcpy(d1 <- s1) + // memcpy(d2 <- d1+o) + // to + // memcpy(d2 <- s1+o) + if (MForwardOffset > 0) { + // The copy destination of `M` maybe can serve as the source of copying. + if (MemCpy && (MForwardOffset == MemCpy->getRawDest()->getPointerOffsetFrom( + MDep->getRawSource(), DL))) { + NewSrc = cast(UseInstr)->getDest(); + } else { + NewSrc = Builder.CreateInBoundsPtrAdd(NewSrc, + Builder.getInt64(MForwardOffset)); + if (Instruction *NewI = dyn_cast(NewSrc)) + NewInsts.push_back(NewI); + } + // We need to update `MCopyLoc` if an offset exists. + MCopyLoc = MCopyLoc.getWithNewPtr(NewSrc); + if (NewAlign) + NewAlign = commonAlignment(*NewAlign, MForwardOffset); + } + + // Avoid infinite loops + if (BAA.isMustAlias(OldSrc, NewSrc)) + return false; + // Verify that the copied-from memory doesn't change in between the two + // transfers. For example, in: + // memcpy(a <- b) + // *b = 42; + // memcpy(c <- a) + // It would be invalid to transform the second memcpy into memcpy(c <- b). + // + // TODO: If the code between M and MDep is transparent to the destination "c", + // then we could still perform the xform by moving M up to the first memcpy. + if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep), + MSSA->getMemoryAccess(UseInstr))) + return false; + return true; +} + +/// Perform simplification of load's. If we have memcpy A which copies X to Y, +/// and load instruction B which loads from Y, then we can rewrite B to be a +/// load instruction loads from X. This allows later passes to remove the memcpy +/// A or identify the source of the load instruction. +bool MemCpyOptPass::processLoad(LoadInst *LI, BasicBlock::iterator &BBI, + SmallVectorImpl &NewInsts) { + if (!LI->isSimple()) + return false; + MemoryUseOrDef *MA = MSSA->getMemoryAccess(LI); + if (!MA) + return false; + BatchAAResults BAA(*AA, EEA); + + MemoryAccess *AnyClobber = MA->getDefiningAccess(); + const MemoryAccess *DestClobber = + MSSA->getWalker()->getClobberingMemoryAccess( + AnyClobber, MemoryLocation::get(LI), BAA); + MemCpyInst *MDep = nullptr; + if (auto *MD = dyn_cast(DestClobber)) + if (Instruction *MI = MD->getMemoryInst()) + MDep = dyn_cast(MI); + + if (!MDep) + return false; + + Value *NewSrc; + MaybeAlign NewAlign; + if (!findNewSrc(MDep, LI, BAA, NewSrc, NewAlign, NewInsts)) + return false; + IRBuilder<> Builder(LI); + Instruction *NewLI = + Builder.CreateAlignedLoad(LI->getType(), NewSrc, NewAlign, LI->getName()); + auto *NewAccess = MSSAU->createMemoryAccessAfter(NewLI, nullptr, MA); + MSSAU->insertUse(cast(NewAccess), /*RenameUses=*/true); + LI->replaceAllUsesWith(NewLI); + eraseInstruction(LI); + ++NumLoadInstr; + return true; +} + bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; @@ -1101,101 +1241,18 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, /// We've found that the (upward scanning) memory dependence of memcpy 'M' is /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can. -bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, - MemCpyInst *MDep, - BatchAAResults &BAA) { - // If dep instruction is reading from our current input, then it is a noop - // transfer and substituting the input won't change this instruction. Just - // ignore the input and let someone else zap MDep. This handles cases like: - // memcpy(a <- a) - // memcpy(b <- a) - if (M->getSource() == MDep->getSource()) - return false; - - // We can only optimize non-volatile memcpy's. - if (MDep->isVolatile()) +bool MemCpyOptPass::processMemCpyMemCpyDependence( + MemCpyInst *M, MemCpyInst *MDep, BatchAAResults &BAA, + SmallVectorImpl &NewInsts) { + Value *NewSrc; + MaybeAlign NewAlign; + if (!findNewSrc(MDep, M, BAA, NewSrc, NewAlign, NewInsts)) return false; - int64_t MForwardOffset = 0; - const DataLayout &DL = M->getModule()->getDataLayout(); - // We can only transforms memcpy's where the dest of one is the source of the - // other, or they have an offset in a range. - if (M->getSource() != MDep->getDest()) { - std::optional Offset = - M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL); - if (!Offset || *Offset < 0) - return false; - MForwardOffset = *Offset; - } - - // The length of the memcpy's must be the same, or the preceding one - // must be larger than the following one. - if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) { - auto *MDepLen = dyn_cast(MDep->getLength()); - auto *MLen = dyn_cast(M->getLength()); - if (!MDepLen || !MLen || - MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) - return false; - } - IRBuilder<> Builder(M); - auto *CopySource = MDep->getSource(); - Instruction *NewCopySource = nullptr; - auto CleanupOnRet = llvm::make_scope_exit([&] { - if (NewCopySource && NewCopySource->use_empty()) - // Safety: It's safe here because we will only allocate more instructions - // after finishing all BatchAA queries, but we have to be careful if we - // want to do something like this in another place. Then we'd probably - // have to delay instruction removal until all transforms on an - // instruction finished. - eraseInstruction(NewCopySource); - }); - MaybeAlign CopySourceAlign = MDep->getSourceAlign(); - // We just need to calculate the actual size of the copy. - auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize( - MemoryLocation::getForSource(M).Size); - - // When the forwarding offset is greater than 0, we transform - // memcpy(d1 <- s1) - // memcpy(d2 <- d1+o) - // to - // memcpy(d2 <- s1+o) - if (MForwardOffset > 0) { - // The copy destination of `M` maybe can serve as the source of copying. - std::optional MDestOffset = - M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL); - if (MDestOffset == MForwardOffset) - CopySource = M->getDest(); - else { - CopySource = Builder.CreateInBoundsPtrAdd( - CopySource, Builder.getInt64(MForwardOffset)); - NewCopySource = dyn_cast(CopySource); - } - // We need to update `MCopyLoc` if an offset exists. - MCopyLoc = MCopyLoc.getWithNewPtr(CopySource); - if (CopySourceAlign) - CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset); - } - - // Avoid infinite loops - if (BAA.isMustAlias(M->getSource(), CopySource)) - return false; - - // Verify that the copied-from memory doesn't change in between the two - // transfers. For example, in: - // memcpy(a <- b) - // *b = 42; - // memcpy(c <- a) - // It would be invalid to transform the second memcpy into memcpy(c <- b). - // - // TODO: If the code between M and MDep is transparent to the destination "c", - // then we could still perform the xform by moving M up to the first memcpy. - if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep), - MSSA->getMemoryAccess(M))) - return false; // No need to create `memcpy(a <- a)`. - if (BAA.isMustAlias(M->getDest(), CopySource)) { + if (BAA.isMustAlias(M->getDest(), NewSrc)) { // Remove the instruction we're replacing. eraseInstruction(M); ++NumMemCpyInstr; @@ -1226,20 +1283,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // example we could be moving from movaps -> movq on x86. Instruction *NewM; if (UseMemMove) - NewM = - Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource, - CopySourceAlign, M->getLength(), M->isVolatile()); + NewM = Builder.CreateMemMove(M->getDest(), M->getDestAlign(), NewSrc, + NewAlign, M->getLength(), M->isVolatile()); else if (isa(M)) { // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is // never allowed since that would allow the latter to be lowered as a call // to an external function. - NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(), - CopySource, CopySourceAlign, - M->getLength(), M->isVolatile()); - } else NewM = - Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource, - CopySourceAlign, M->getLength(), M->isVolatile()); + Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(), NewSrc, + NewAlign, M->getLength(), M->isVolatile()); + } else + NewM = Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), NewSrc, + NewAlign, M->getLength(), M->isVolatile()); NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID); assert(isa(MSSA->getMemoryAccess(M))); @@ -1703,7 +1758,8 @@ static bool isZeroSize(Value *Size) { /// B to be a memcpy from X to Z (or potentially a memmove, depending on /// circumstances). This allows later passes to remove the first memcpy /// altogether. -bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { +bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI, + SmallVectorImpl &NewInsts) { // We can only optimize non-volatile memcpy's. if (M->isVolatile()) return false; @@ -1791,7 +1847,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { } } if (auto *MDep = dyn_cast(MI)) - if (processMemCpyMemCpyDependence(M, MDep, BAA)) + if (processMemCpyMemCpyDependence(M, MDep, BAA, NewInsts)) return true; if (auto *MDep = dyn_cast(MI)) { if (performMemCpyToMemSetOptzn(M, MDep, BAA)) { @@ -2096,7 +2152,8 @@ bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) { } /// Executes one iteration of MemCpyOptPass. -bool MemCpyOptPass::iterateOnFunction(Function &F) { +bool MemCpyOptPass::iterateOnFunction( + Function &F, SmallVectorImpl &NewInsts) { bool MadeChange = false; // Walk all instruction in the function. @@ -2114,12 +2171,14 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) { bool RepeatInstruction = false; - if (auto *SI = dyn_cast(I)) + if (auto *LI = dyn_cast(I)) + MadeChange |= processLoad(LI, BI, NewInsts); + else if (auto *SI = dyn_cast(I)) MadeChange |= processStore(SI, BI); else if (auto *M = dyn_cast(I)) RepeatInstruction = processMemSet(M, BI); else if (auto *M = dyn_cast(I)) - RepeatInstruction = processMemCpy(M, BI); + RepeatInstruction = processMemCpy(M, BI, NewInsts); else if (auto *M = dyn_cast(I)) RepeatInstruction = processMemMove(M, BI); else if (auto *CB = dyn_cast(I)) { @@ -2176,13 +2235,19 @@ bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_, MSSAU = &MSSAU_; EarliestEscapeAnalysis EEA_(*DT); EEA = &EEA_; + SmallVector NewInsts; while (true) { - if (!iterateOnFunction(F)) + if (!iterateOnFunction(F, NewInsts)) break; MadeChange = true; } + for (auto *I : NewInsts) { + if (I->use_empty()) + eraseInstruction(I); + } + if (VerifyMemorySSA) MSSA_->verifyMemorySSA(); diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-load.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-load.ll new file mode 100644 index 0000000000000..462e03f22c2f1 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/memcpy-load.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s + +define i24 @forward_load(ptr %src) { +; CHECK-LABEL: define i24 @forward_load( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 3, i1 false) +; CHECK-NEXT: [[VAL1:%.*]] = load i24, ptr [[SRC]], align 4 +; CHECK-NEXT: ret i24 [[VAL1]] +; + %dest = alloca [3 x i8] + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false) + %val = load i24, ptr %dest + ret i24 %val +} + +define i16 @forward_load_2(ptr %src) { +; CHECK-LABEL: define i16 @forward_load_2( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 2, i1 false) +; CHECK-NEXT: [[VAL1:%.*]] = load i16, ptr [[SRC]], align 2 +; CHECK-NEXT: ret i16 [[VAL1]] +; + %dest = alloca [3 x i8] + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false) + %val = load i16, ptr %dest + ret i16 %val +} + +define i32 @forward_load_padding(ptr %src) { +; CHECK-LABEL: define i32 @forward_load_padding( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[DEST:%.*]] = alloca { i8, i32 }, align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 8, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DEST]], i64 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 4 +; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: ret i32 [[VAL1]] +; + %dest = alloca { i8, i32 } + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 8, i1 false) + %gep = getelementptr inbounds i8, ptr %dest, i64 4 + %val = load i32, ptr %gep + ret i32 %val +} + +; Negative tests + +define i24 @failed_forward_load_write_src(ptr %src) { +; CHECK-LABEL: define i24 @failed_forward_load_write_src( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 3, i1 false) +; CHECK-NEXT: store i1 true, ptr [[SRC]], align 1 +; CHECK-NEXT: [[VAL:%.*]] = load i24, ptr [[DEST]], align 4 +; CHECK-NEXT: ret i24 [[VAL]] +; + %dest = alloca [3 x i8] + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false) + store i1 true, ptr %src + %val = load i24, ptr %dest + ret i24 %val +} + +define i16 @failed_forward_load_size(ptr %src) { +; CHECK-LABEL: define i16 @failed_forward_load_size( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 1, i1 false) +; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[DEST]], align 2 +; CHECK-NEXT: ret i16 [[VAL]] +; + %dest = alloca [3 x i8] + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false) + %val = load i16, ptr %dest + ret i16 %val +} + +define i32 @failed_forward_load_padding(ptr %src) { +; CHECK-LABEL: define i32 @failed_forward_load_padding( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[DEST:%.*]] = alloca { i8, i32 }, align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 5, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DEST]], i64 4 +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; + %dest = alloca { i8, i32 } + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 5, i1 false) + %gep = getelementptr inbounds i8, ptr %dest, i64 4 + %val = load i32, ptr %gep + ret i32 %val +} + +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll index 89d8eb1ee6711..066325086b7f0 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -229,10 +229,8 @@ define void @test4_write_between(ptr %P) { define i8 @test4_read_between(ptr %P) { ; CHECK-LABEL: @test4_read_between( -; CHECK-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A1]], ptr align 4 [[P:%.*]], i64 8, i1 false) -; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[A1]], align 1 -; CHECK-NEXT: call void @test4a(ptr byval(i8) align 1 [[P]]) +; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[A1:%.*]], align 4 +; CHECK-NEXT: call void @test4a(ptr byval(i8) align 1 [[A1]]) ; CHECK-NEXT: ret i8 [[X]] ; %a1 = alloca %1 diff --git a/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll b/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll index 5e13432746bf7..51689cc6fd452 100644 --- a/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll +++ b/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll @@ -20,7 +20,7 @@ define i32 @foo(i1 %z) { ; CHECK-NEXT: br label [[FOR_INC7_1]] ; CHECK: for.inc7.1: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 [[SCEVGEP]], i64 4, i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SCEVGEP]], align 4 ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: diff --git a/llvm/test/Transforms/PhaseOrdering/pr137810-forward-load.ll b/llvm/test/Transforms/PhaseOrdering/pr137810-forward-load.ll new file mode 100644 index 0000000000000..006f15a31c4e1 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/pr137810-forward-load.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -O2 -S < %s | FileCheck %s + +; FIXME: It can return true. +define i1 @main(ptr %i2) { +; CHECK-LABEL: define noundef i1 @main( +; CHECK-SAME: ptr captures(none) initializes((0, 3)) [[I2:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[I1:%.*]] = alloca [3 x i8], align 1 +; CHECK-NEXT: store i8 0, ptr [[I2]], align 1 +; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds nuw i8, ptr [[I2]], i64 1 +; CHECK-NEXT: store i8 1, ptr [[I3]], align 1 +; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds nuw i8, ptr [[I2]], i64 2 +; CHECK-NEXT: store i8 2, ptr [[I4]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 3, ptr nonnull [[I1]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[I1]], ptr noundef nonnull align 1 dereferenceable(3) [[I2]], i64 3, i1 false) +; CHECK-NEXT: [[I51:%.*]] = load i8, ptr [[I2]], align 1 +; CHECK-NEXT: [[I6:%.*]] = icmp eq i8 [[I51]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[I2]], i64 1 +; CHECK-NEXT: [[I82:%.*]] = load i8, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[I9:%.*]] = icmp eq i8 [[I82]], 1 +; CHECK-NEXT: [[I10:%.*]] = select i1 [[I6]], i1 [[I9]], i1 false +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[I2]], i64 2 +; CHECK-NEXT: [[I123:%.*]] = load i8, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[I13:%.*]] = icmp eq i8 [[I123]], 2 +; CHECK-NEXT: [[I14:%.*]] = select i1 [[I10]], i1 [[I13]], i1 false +; CHECK-NEXT: br i1 [[I14]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; CHECK: [[COMMON_RET:.*]]: +; CHECK-NEXT: ret i1 [[I14]] +; CHECK: [[TRUE]]: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 3, ptr nonnull [[I1]]) +; CHECK-NEXT: br label %[[COMMON_RET]] +; CHECK: [[FALSE]]: +; CHECK-NEXT: call void @assert_failed(ptr nonnull [[I1]]) +; CHECK-NEXT: br label %[[COMMON_RET]] +; + %i1 = alloca [3 x i8], align 1 + store i8 0, ptr %i2, align 1 + %i3 = getelementptr inbounds nuw i8, ptr %i2, i64 1 + store i8 1, ptr %i3, align 1 + %i4 = getelementptr inbounds nuw i8, ptr %i2, i64 2 + store i8 2, ptr %i4, align 1 + call void @llvm.lifetime.start.p0(i64 3, ptr nonnull %i1) + call void @llvm.memcpy.p0.p0.i64(ptr %i1, ptr %i2, i64 3, i1 false) + %i5 = load i8, ptr %i1, align 1 + %i6 = icmp eq i8 %i5, 0 + %i7 = getelementptr inbounds nuw i8, ptr %i1, i64 1 + %i8 = load i8, ptr %i7, align 1 + %i9 = icmp eq i8 %i8, 1 + %i10 = select i1 %i6, i1 %i9, i1 false + %i11 = getelementptr inbounds nuw i8, ptr %i1, i64 2 + %i12 = load i8, ptr %i11, align 1 + %i13 = icmp eq i8 %i12, 2 + %i14 = select i1 %i10, i1 %i13, i1 false + br i1 %i14, label %true, label %false + +true: + call void @llvm.lifetime.end.p0(i64 3, ptr nonnull %i1) + ret i1 true + +false: + call void @assert_failed(ptr %i1) + ret i1 false +} + +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) +declare void @llvm.lifetime.start.p0(i64, ptr) +declare void @llvm.lifetime.end.p0(i64, ptr) +declare void @assert_failed(ptr)