diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index f9dcb472ed1d2..f135c0a291f0c 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5771,6 +5771,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { return false; } +// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst +// is the first instruction that will use Addr. So we need to find the first +// user of Addr in current BB. +static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, + Value *SunkAddr) { + if (Addr->hasOneUse()) + return MemoryInst->getIterator(); + + // We already have a SunkAddr in current BB, but we may need to insert cast + // instruction after it. + if (SunkAddr) { + if (Instruction *AddrInst = dyn_cast(SunkAddr)) + return std::next(AddrInst->getIterator()); + } + + // Find the first user of Addr in current BB. + Instruction *Earliest = MemoryInst; + for (User *U : Addr->users()) { + Instruction *UserInst = dyn_cast(U); + if (UserInst && UserInst->getParent() == MemoryInst->getParent()) { + if (isa(UserInst) || UserInst->isDebugOrPseudoInst()) + continue; + if (UserInst->comesBefore(Earliest)) + Earliest = UserInst; + } + } + return Earliest->getIterator(); +} + /// Sink addressing mode computation immediate before MemoryInst if doing so /// can be done without increasing register pressure. The need for the /// register pressure constraint means this can end up being an all or nothing @@ -5895,11 +5924,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, return Modified; } - // Insert this computation right after this user. Since our caller is - // scanning from the top of the BB to the bottom, reuse of the expr are - // guaranteed to happen later. - IRBuilder<> Builder(MemoryInst); - // Now that we determined the addressing expression we want to use and know // that we have to sink it into this block. Check to see if we have already // done this for some other load/store instr in this block. If so, reuse @@ -5910,6 +5934,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); + + // The current BB may be optimized multiple times, we can't guarantee the + // reuse of Addr happens later, call findInsertPos to find an appropriate + // insert position. + IRBuilder<> Builder(MemoryInst->getParent(), + findInsertPos(Addr, MemoryInst, SunkAddr)); + if (SunkAddr) { LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll new file mode 100644 index 0000000000000..019f311406550 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -p 'require,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +declare void @g(ptr) + +; %load and %load5 use the same address, %load5 is optimized first, %load is +; optimized later and reuse the same address computation instruction. We must +; make sure not to generate use before def error. + +define void @f(ptr %arg) { +; CHECK-LABEL: define void @f( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 +; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]]) +; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56 +; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8 +; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0 +; CHECK-NEXT: ret void +; +bb: + %getelementptr = getelementptr i8, ptr %arg, i64 -64 + %getelementptr1 = getelementptr i8, ptr %arg, i64 -56 + call void @g(ptr %getelementptr) + br label %bb3 + +bb3: + %load = load ptr, ptr %getelementptr, align 8 + %load4 = load i32, ptr %getelementptr1, align 8 + %load5 = load ptr, ptr %getelementptr, align 8 + %add = add i32 1, 0 + %icmp = icmp eq i32 %add, 0 + br i1 %icmp, label %bb7, label %bb7 + +bb7: + ret void +}