Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5913,6 +5913,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (SunkAddr) {
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
<< " for " << *MemoryInst << "\n");
Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr);
if (AddrInst && MemoryInst->comesBefore(AddrInst))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit brute force, is there another way to get the implied correct insert point? e.g. change the insertion point above instead of MemoryInst

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a function to find the appropriate insert position for a sunk address instruction.

AddrInst->moveBefore(MemoryInst->getIterator());
if (SunkAddr->getType() != Addr->getType()) {
if (SunkAddr->getType()->getPointerAddressSpace() !=
Addr->getType()->getPointerAddressSpace() &&
Expand Down
44 changes: 44 additions & 0 deletions llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"

declare void @g(ptr)

; %load and %load5 use the same address, %load5 is optimized first, %load is
; optimized later and reuse the same address computation instruction. We must
; make sure not to generate use before def error.

define void @f(ptr %arg) {
; CHECK-LABEL: define void @f(
; CHECK-SAME: ptr [[ARG:%.*]]) {
; CHECK-NEXT: [[BB:.*:]]
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]])
; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
; CHECK-NEXT: ret void
;
bb:
%getelementptr = getelementptr i8, ptr %arg, i64 -64
%getelementptr1 = getelementptr i8, ptr %arg, i64 -56
call void @g(ptr %getelementptr)
br label %bb3

bb3:
%load = load ptr, ptr %getelementptr, align 8
%load4 = load i32, ptr %getelementptr1, align 8
%load5 = load ptr, ptr %getelementptr, align 8
%add = add i32 1, 0
%icmp = icmp eq i32 %add, 0
br i1 %icmp, label %bb7, label %bb7

bb7:
ret void
}
Loading