Skip to content
Merged
7 changes: 7 additions & 0 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6092,6 +6092,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
}

if (!ResultIndex) {
auto PtrInst = dyn_cast<Instruction>(ResultPtr);
// We know that we have a pointer without any offsets. If this pointer
// originates from a different basic block than the current one, we
// must be able to recreate it in the current basic block.
// We do not support the recreation of any instructions yet.
if (PtrInst && PtrInst->getParent() != MemoryInst->getParent())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we can change the predicate to

if (!PtrInst || PtrInst->getParent() != MemoryInst->getParent())

To skip arguments and global pointers. I'm not sure if it is completely correct.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer this condition. We should not modify a memory access instruction with pointer comes from argument or global value directly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. I also added tests for arguments and globals. @weiguozhi you may be interested that we don't reuse addresses in these cases: https://godbolt.org/z/q8fzce4ov

return Modified;
SunkAddr = ResultPtr;
} else {
if (ResultPtr->getType() != I8PtrTy)
Expand Down
106 changes: 106 additions & 0 deletions llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-recreate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"

@globalptr = external global ptr
declare ptr @get_ptr(i64)

; Can't recreate invoke instruction

define void @addr_from_invoke() personality ptr null {
; CHECK-LABEL: define void @addr_from_invoke() personality ptr null {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PTR:%.*]] = invoke ptr @get_ptr(i64 0)
; CHECK-NEXT: to label %[[BODY_1:.*]] unwind label %[[EHCLEANUP:.*]]
; CHECK: [[EHCLEANUP]]:
; CHECK-NEXT: [[PAD:%.*]] = cleanuppad within none []
; CHECK-NEXT: cleanupret from [[PAD]] unwind to caller
; CHECK: [[BODY_1]]:
; CHECK-NEXT: [[GEP1:%.*]] = bitcast ptr [[PTR]] to ptr
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[GEP1]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[PTR]] to ptr
; CHECK-NEXT: [[UNUSED:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP0]], align 4
; CHECK-NEXT: ret void
;
entry:
%ptr = invoke ptr @get_ptr(i64 0) to label %body.1 unwind label %ehcleanup

body.2:
%unused = load <4 x i32>, ptr %gep, align 4
store <4 x i32> zeroinitializer, ptr %gep, align 4
ret void

ehcleanup:
%pad = cleanuppad within none []
cleanupret from %pad unwind to caller

body.1:
%gep = getelementptr { i32 }, ptr %ptr, i64 0, i32 0
store <4 x i32> zeroinitializer, ptr %gep, align 4
br label %body.2
}

define void @addr_from_arg(ptr %ptr, i1 %p) {
; CHECK-LABEL: define void @addr_from_arg(
; CHECK-SAME: ptr [[PTR:%.*]], i1 [[P:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[P]], label %[[BODY_1:.*]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
; CHECK: [[BODY_1]]:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[PTR]] to ptr
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP0]], align 4
; CHECK-NEXT: [[UNUSED:%.*]] = load <4 x i32>, ptr [[PTR]], align 4
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[PTR]], align 4
; CHECK-NEXT: ret void
;
entry:
br i1 %p, label %body.1, label %exit

body.2:
%unused = load <4 x i32>, ptr %gep, align 4
store <4 x i32> zeroinitializer, ptr %gep, align 4
ret void

exit:
ret void

body.1:
%gep = getelementptr { i32 }, ptr %ptr, i64 0, i32 0
store <4 x i32> zeroinitializer, ptr %gep, align 4
br label %body.2
}

define void @addr_from_global(i1 %p) {
; CHECK-LABEL: define void @addr_from_global(
; CHECK-SAME: i1 [[P:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[P]], label %[[BODY_1:.*]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
; CHECK: [[BODY_1]]:
; CHECK-NEXT: [[GEP1:%.*]] = bitcast ptr @globalptr to ptr
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[GEP1]], align 4
; CHECK-NEXT: [[UNUSED:%.*]] = load <4 x i32>, ptr @globalptr, align 4
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @globalptr, align 4
; CHECK-NEXT: ret void
;
entry:
br i1 %p, label %body.1, label %exit

body.2:
%unused = load <4 x i32>, ptr %gep, align 4
store <4 x i32> zeroinitializer, ptr %gep, align 4
ret void

exit:
ret void

body.1:
%gep = getelementptr { i32 }, ptr @globalptr, i64 0, i32 0
store <4 x i32> zeroinitializer, ptr %gep, align 4
br label %body.2
}
Loading