Skip to content

Commit 8d72e76

Browse files
committed
[CGP] Bail out if (Base|Scaled)Reg does not dominate insert point.
(Base|Scaled)Reg may not dominate the chosen insert point, if there are multiple uses of the address. Bail out if that's the case, otherwise we will generate invalid IR. In some cases, we could probably adjust the insert point or hoist the (Base|Scaled)Reg.
1 parent b9d3a64 commit 8d72e76

File tree

2 files changed

+57
-2
lines changed

2 files changed

+57
-2
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5945,8 +5945,15 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
59455945
// The current BB may be optimized multiple times, we can't guarantee the
59465946
// reuse of Addr happens later, call findInsertPos to find an appropriate
59475947
// insert position.
5948-
IRBuilder<> Builder(MemoryInst->getParent(),
5949-
findInsertPos(Addr, MemoryInst, SunkAddr));
5948+
auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5949+
5950+
// TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5951+
if (!SunkAddr &&
5952+
((AddrMode.BaseReg && !DT->dominates(AddrMode.BaseReg, &*InsertPos)) ||
5953+
(AddrMode.ScaledReg && !DT->dominates(AddrMode.ScaledReg, &*InsertPos))))
5954+
return Modified;
5955+
5956+
IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
59505957

59515958
if (SunkAddr) {
59525959
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' %s | FileCheck %s
3+
4+
5+
target triple = "x86_64-unknown-linux"
6+
7+
declare i1 @cond(float)
8+
9+
define void @test(ptr %src) {
10+
; CHECK-LABEL: define void @test(
11+
; CHECK-SAME: ptr [[SRC:%.*]]) {
12+
; CHECK-NEXT: [[BB:.*]]:
13+
; CHECK-NEXT: br label %[[LOOP:.*]]
14+
; CHECK: [[LOOP]]:
15+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
16+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
17+
; CHECK-NEXT: [[SUNKADDR2:%.*]] = mul i64 [[IV_NEXT]], 2
18+
; CHECK-NEXT: [[SUNKADDR3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR2]]
19+
; CHECK-NEXT: [[SUNKADDR4:%.*]] = getelementptr i8, ptr [[SUNKADDR3]], i64 6
20+
; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[SUNKADDR4]], align 4
21+
; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 2
22+
; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR]]
23+
; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[SUNKADDR1]], align 4
24+
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @cond(float [[L_0]])
25+
; CHECK-NEXT: [[C:%.*]] = call i1 @cond(float [[L_1]])
26+
; CHECK-NEXT: br i1 [[C]], label %[[LOOP]], label %[[EXIT:.*]]
27+
; CHECK: [[EXIT]]:
28+
; CHECK-NEXT: ret void
29+
;
30+
bb:
31+
%gep.base = getelementptr i8, ptr %src, i64 8
32+
br label %loop
33+
34+
loop:
35+
%iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ]
36+
%iv.shl = shl i64 %iv, 1
37+
%gep.shl = getelementptr i8, ptr %gep.base, i64 %iv.shl
38+
%gep.sub = getelementptr i8, ptr %gep.shl, i64 -8
39+
%iv.next = add i64 %iv, 1
40+
%l.0 = load float, ptr %gep.shl, align 4
41+
%l.1 = load float, ptr %gep.sub, align 4
42+
call i1 @cond(float %l.0)
43+
%c = call i1 @cond(float %l.1)
44+
br i1 %c, label %loop, label %exit
45+
46+
exit:
47+
ret void
48+
}

0 commit comments

Comments
 (0)