Skip to content

Commit e32c224

Browse files
committed
[IPSCCP] Push constant struct params into callee's
This patch pushes constant alloca params into single use callees. This is beneficial in large functions emitted by fortran "box passing" which can combine multiple constant alloca parameters into one large alloca, which then results in duplicate unnecessary constant stores and less clear optimization paths. This only works for CallInsts that have a single use with an alloca parameter, that of which's users are only stores and GEP's that are stored to one layer deep.
1 parent 35684fa commit e32c224

File tree

3 files changed

+224
-4
lines changed

3 files changed

+224
-4
lines changed

llvm/lib/Transforms/IPO/SCCP.cpp

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/IR/AttributeMask.h"
2424
#include "llvm/IR/Constants.h"
2525
#include "llvm/IR/DIBuilder.h"
26+
#include "llvm/IR/IRBuilder.h"
2627
#include "llvm/IR/IntrinsicInst.h"
2728
#include "llvm/Support/CommandLine.h"
2829
#include "llvm/Support/ModRef.h"
@@ -265,6 +266,125 @@ static bool runIPSCCP(
265266
}
266267
}
267268

269+
// If a function has one use, has an alloca parameter, and its caller has
270+
// nothing but geps/stores to the alloca, push the alloca definition and all
271+
// stores/geps into the caller. For now, rely on argpromotion to clean up the
272+
// dead arguments left in the caller
273+
for (auto &F : M) {
274+
if (F.hasOneUse() && canTrackArgumentsInterprocedurally(&F)) {
275+
CallInst *CI = dyn_cast<CallInst>(*F.user_begin());
276+
if (!CI)
277+
continue;
278+
for (auto &Arg : CI->args()) {
279+
auto AI = dyn_cast<AllocaInst>(Arg);
280+
if (!AI)
281+
continue;
282+
283+
auto GetAllocaUsers = [&CI](AllocaInst *AI,
284+
SmallVector<Value *> &AllocaUsers) -> bool {
285+
for (User *U : AI->users()) {
286+
if (U == CI)
287+
continue;
288+
289+
auto I = dyn_cast<Instruction>(U);
290+
if (!I)
291+
continue;
292+
switch (I->getOpcode()) {
293+
default: {
294+
return false;
295+
}
296+
case Instruction::Store: {
297+
auto SI = cast<StoreInst>(U);
298+
if (SI->isVolatile() || !isa<Constant>(SI->getValueOperand())) {
299+
return false;
300+
}
301+
AllocaUsers.push_back(SI);
302+
break;
303+
}
304+
case Instruction::GetElementPtr: {
305+
auto GEP = cast<GetElementPtrInst>(U);
306+
auto SI = dyn_cast<StoreInst>(*GEP->users().begin());
307+
if (GEP->getNumUses() != 1 || !SI ||
308+
!isa<Constant>(SI->getValueOperand())) {
309+
return false;
310+
}
311+
AllocaUsers.push_back(GEP);
312+
break;
313+
}
314+
}
315+
}
316+
return !AllocaUsers.empty();
317+
};
318+
319+
SmallVector<Value *> AllocaUsers;
320+
if (!GetAllocaUsers(AI, AllocaUsers))
321+
continue;
322+
323+
// Copy uses of the Alloca to the callee
324+
IRBuilder<> B(&F.getEntryBlock().front());
325+
DataLayout DL = AI->getDataLayout();
326+
AllocaInst *NewAI =
327+
B.CreateAlloca(AI->getAllocatedType(), nullptr, AI->getName());
328+
F.getArg(Arg.getOperandNo())->replaceAllUsesWith(NewAI);
329+
NewAI->setAlignment(AI->getAlign());
330+
331+
for (auto U : AllocaUsers) {
332+
switch (cast<Instruction>(U)->getOpcode()) {
333+
default:
334+
llvm_unreachable("Illegal user type in AllocaUsers");
335+
case Instruction::Store: {
336+
auto SI = cast<StoreInst>(U);
337+
auto NewStore = B.CreateStore(SI->getValueOperand(), NewAI);
338+
NewStore->setAlignment(SI->getAlign());
339+
break;
340+
}
341+
case Instruction::GetElementPtr: {
342+
auto GEP = cast<GetElementPtrInst>(U);
343+
344+
SmallVector<Value *> GepIndices;
345+
for (unsigned i = 0; i < GEP->getNumIndices(); i++)
346+
GepIndices.push_back(GEP->getOperand(i + 1));
347+
348+
GetElementPtrInst *NewGep = cast<GetElementPtrInst>(
349+
B.CreateGEP(GEP->getSourceElementType(), NewAI, GepIndices));
350+
NewGep->setNoWrapFlags(GEP->getNoWrapFlags());
351+
352+
auto SI = cast<StoreInst>(*GEP->users().begin());
353+
auto NewStore = B.CreateStore(SI->getValueOperand(), NewGep);
354+
NewStore->setAlignment(SI->getAlign());
355+
}
356+
}
357+
}
358+
359+
// Remove old uses of the Alloca in the caller
360+
while (!AllocaUsers.empty()) {
361+
Instruction *I = cast<Instruction>(AllocaUsers.pop_back_val());
362+
switch (I->getOpcode()) {
363+
default:
364+
llvm_unreachable("Illegal user type when removing Alloca users");
365+
case Instruction::Store: {
366+
I->removeFromParent();
367+
I->deleteValue();
368+
break;
369+
}
370+
case Instruction::GetElementPtr: {
371+
auto SI = cast<Instruction>(*I->users().begin());
372+
SI->removeFromParent();
373+
SI->deleteValue();
374+
I->removeFromParent();
375+
I->deleteValue();
376+
}
377+
}
378+
}
379+
MadeChanges = true;
380+
381+
// TODO:
382+
// - delete dead params here instead of relying on argpromotion
383+
// - remove empty alloca instruction
384+
}
385+
}
386+
}
387+
268388
// If we inferred constant or undef return values for a function, we replaced
269389
// all call uses with the inferred value. This means we don't need to bother
270390
// actually returning anything from the function. Replace all return

llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@
99

1010
define internal void @f(ptr byval(%struct.ss) align 8 %b, ptr byval(i32) align 4 %X) noinline nounwind {
1111
; CHECK-LABEL: define {{[^@]+}}@f
12-
; CHECK-SAME: (i32 [[B_0:%.*]]){{[^#]*}} #[[ATTR0:[0-9]+]] {
1312
; CHECK-NEXT: entry:
14-
; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1
15-
; CHECK-NEXT: store i32 [[TEMP]], ptr [[DUMMY]], align 4
13+
; CHECK-NEXT: store i32 2, ptr [[DUMMY]], align 4
1614
; CHECK-NEXT: ret void
1715
;
1816
entry:
@@ -27,7 +25,7 @@ define i32 @test(ptr %X) {
2725
; CHECK-LABEL: define {{[^@]+}}@test
2826
; CHECK-SAME: (ptr {{[^%]*}} [[X:%.*]]){{[^#]*}} #[[ATTR1:[0-9]+]] {
2927
; CHECK-NEXT: entry:
30-
; CHECK-NEXT: tail call {{.*}}void @f(i32 1)
28+
; CHECK-NEXT: tail call {{.*}}void @f()
3129
; CHECK-NEXT: ret i32 0
3230
;
3331
entry:
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=ipsccp,argpromotion < %s | FileCheck %s
3+
4+
@sudoku0 = internal global [9 x [9 x i32]] zeroinitializer
5+
@sudoku1 = internal global [9 x [9 x i32]] zeroinitializer
6+
7+
declare void @callee2(ptr nocapture nonnull readonly %0)
8+
9+
define internal i64 @callee(ptr nocapture readonly %0, ptr nocapture readonly %1) local_unnamed_addr {
10+
; CHECK-LABEL: define internal i64 @callee() local_unnamed_addr {
11+
; CHECK-NEXT: [[TMP1:%.*]] = alloca { ptr, i64 }, align 8
12+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
13+
; CHECK-NEXT: store i64 4, ptr [[TMP2]], align 8
14+
; CHECK-NEXT: store ptr @sudoku1, ptr [[TMP1]], align 8
15+
; CHECK-NEXT: [[TMP3:%.*]] = alloca { ptr, i64 }, align 8
16+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
17+
; CHECK-NEXT: store i64 4, ptr [[TMP4]], align 8
18+
; CHECK-NEXT: store ptr @sudoku0, ptr [[TMP3]], align 8
19+
; CHECK-NEXT: [[MEGASTRUCT:%.*]] = alloca { ptr, ptr, { ptr, i64 }, { ptr, i64 } }, align 8
20+
; CHECK-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4
21+
; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca i32, align 4
22+
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
23+
; CHECK-NEXT: [[GEP_0_1:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
24+
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[GEP_0_1]], align 8
25+
; CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP3]], align 8
26+
; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
27+
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[GEP_1_1]], align 8
28+
; CHECK-NEXT: store ptr [[ALLOCA0]], ptr [[MEGASTRUCT]], align 8
29+
; CHECK-NEXT: [[MEGASTRUCT_GEP_1:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 8
30+
; CHECK-NEXT: store ptr [[ALLOCA1]], ptr [[MEGASTRUCT_GEP_1]], align 8
31+
; CHECK-NEXT: [[MEGASTRUCT_GEP_2:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 16
32+
; CHECK-NEXT: store ptr [[TMP5]], ptr [[MEGASTRUCT_GEP_2]], align 8
33+
; CHECK-NEXT: [[MEGASTRUCT_GEP_3:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 24
34+
; CHECK-NEXT: store i64 [[TMP6]], ptr [[MEGASTRUCT_GEP_3]], align 8
35+
; CHECK-NEXT: [[MEGASTRUCT_GEP_4:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 32
36+
; CHECK-NEXT: store ptr [[TMP7]], ptr [[MEGASTRUCT_GEP_4]], align 8
37+
; CHECK-NEXT: [[MEGASTRUCT_GEP_5:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 40
38+
; CHECK-NEXT: store i64 [[TMP8]], ptr [[MEGASTRUCT_GEP_5]], align 8
39+
; CHECK-NEXT: call fastcc void @callee2(ptr [[MEGASTRUCT]])
40+
; CHECK-NEXT: ret i64 poison
41+
;
42+
%megastruct = alloca { ptr, ptr, { ptr, i64 }, { ptr, i64 } }
43+
44+
%alloca0 = alloca i32, align 4
45+
%alloca1 = alloca i32, align 4
46+
47+
%3 = load ptr, ptr %0, align 8
48+
%gep.0.1 = getelementptr inbounds i8, ptr %0, i64 8
49+
%4 = load i64, ptr %gep.0.1, align 8
50+
51+
%5 = load ptr, ptr %0, align 8
52+
%gep.1.1 = getelementptr inbounds i8, ptr %1, i64 8
53+
%6 = load i64, ptr %gep.1.1, align 8
54+
55+
store ptr %alloca0, ptr %megastruct, align 8
56+
%megastruct.gep.1 = getelementptr inbounds i8, ptr %megastruct, i64 8
57+
store ptr %alloca1, ptr %megastruct.gep.1, align 8
58+
59+
%megastruct.gep.2 = getelementptr inbounds i8, ptr %megastruct, i64 16
60+
store ptr %3, ptr %megastruct.gep.2, align 8
61+
62+
%megastruct.gep.3 = getelementptr inbounds i8, ptr %megastruct, i64 24
63+
store i64 %4, ptr %megastruct.gep.3, align 8
64+
65+
%megastruct.gep.4 = getelementptr inbounds i8, ptr %megastruct, i64 32
66+
store ptr %5, ptr %megastruct.gep.4, align 8
67+
68+
%megastruct.gep.5 = getelementptr inbounds i8, ptr %megastruct, i64 40
69+
store i64 %6, ptr %megastruct.gep.5, align 8
70+
71+
call fastcc void @callee2(ptr %megastruct)
72+
ret i64 1
73+
}
74+
75+
define i64 @caller() local_unnamed_addr {
76+
; CHECK-LABEL: define i64 @caller() local_unnamed_addr {
77+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee()
78+
; CHECK-NEXT: ret i64 1
79+
;
80+
%1 = alloca { ptr, i64 }, align 8
81+
%2 = alloca { ptr, i64 }, align 8
82+
83+
store ptr @sudoku0, ptr %1, align 8
84+
%.gep0 = getelementptr inbounds i8, ptr %1, i64 8
85+
store i64 4, ptr %.gep0, align 8
86+
87+
store ptr @sudoku1, ptr %2, align 8
88+
%.gep1 = getelementptr inbounds i8, ptr %2, i64 8
89+
store i64 4, ptr %.gep1, align 8
90+
91+
%p = call i64 @callee(ptr nonnull %1, ptr nonnull %2)
92+
ret i64 %p
93+
}
94+
95+
define i64 @m() local_unnamed_addr {
96+
; CHECK-LABEL: define i64 @m() local_unnamed_addr {
97+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @caller()
98+
; CHECK-NEXT: ret i64 1
99+
;
100+
%1 = call i64 @caller()
101+
ret i64 %1
102+
}

0 commit comments

Comments
 (0)