Skip to content

Commit 9b4f6f9

Browse files
committed
[CodeGenPrepare] Make sure instruction get from SunkAddrs is before MemoryInst
Function optimizeBlock may do optimizations on a block for multiple times. In the first iteration of the loop, MemoryInst1 may generate a sunk instruction and store it into SunkAddrs. In the second iteration of the loop, MemoryInst2 may use the same address and then it can reuse the sunk instruction stored in SunkAddrs, but MemoryInst2 may be before MemoryInst1 and the corresponding sunk instruction. In order to avoid use before def error, we need to move the sunk instruction before MemoryInst2. It fixes issue 138208.
1 parent 55a88cd commit 9b4f6f9

File tree

2 files changed

+47
-0
lines changed

2 files changed

+47
-0
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5913,6 +5913,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
59135913
if (SunkAddr) {
59145914
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
59155915
<< " for " << *MemoryInst << "\n");
5916+
Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr);
5917+
if (AddrInst && MemoryInst->comesBefore(AddrInst))
5918+
AddrInst->moveBefore(MemoryInst->getIterator());
59165919
if (SunkAddr->getType() != Addr->getType()) {
59175920
if (SunkAddr->getType()->getPointerAddressSpace() !=
59185921
Addr->getType()->getPointerAddressSpace() &&
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5+
target triple = "x86_64-grtev4-linux-gnu"
6+
7+
declare void @g(ptr)
8+
9+
; %load and %load5 use the same address, %load5 is optimized first, %load is
10+
; optimized later and reuse the same address computation instruction. We must
11+
; make sure not to generate use before def error.
12+
13+
define void @f(ptr %arg) {
14+
; CHECK-LABEL: define void @f(
15+
; CHECK-SAME: ptr [[ARG:%.*]]) {
16+
; CHECK-NEXT: [[BB:.*:]]
17+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
18+
; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]])
19+
; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
20+
; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
21+
; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
22+
; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
23+
; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
24+
; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
25+
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
26+
; CHECK-NEXT: ret void
27+
;
28+
bb:
29+
%getelementptr = getelementptr i8, ptr %arg, i64 -64
30+
%getelementptr1 = getelementptr i8, ptr %arg, i64 -56
31+
call void @g(ptr %getelementptr)
32+
br label %bb3
33+
34+
bb3:
35+
%load = load ptr, ptr %getelementptr, align 8
36+
%load4 = load i32, ptr %getelementptr1, align 8
37+
%load5 = load ptr, ptr %getelementptr, align 8
38+
%add = add i32 1, 0
39+
%icmp = icmp eq i32 %add, 0
40+
br i1 %icmp, label %bb7, label %bb7
41+
42+
bb7:
43+
ret void
44+
}

0 commit comments

Comments
 (0)