Skip to content

Commit e164c31

Browse files
jaladreipsigcbot
authored andcommitted
Implement MergeAllocas pass
Implement MergeAllocas pass
1 parent 086a1d5 commit e164c31

File tree

13 files changed

+595
-16
lines changed

13 files changed

+595
-16
lines changed
Lines changed: 348 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,348 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
#include "Compiler/IGCPassSupport.h"
10+
#include "MergeAllocas.h"
11+
#include "Probe/Assertion.h"
12+
#include "debug/DebugMacros.hpp"
13+
14+
#include "common/LLVMWarningsPush.hpp"
15+
#include <llvm/ADT/SetVector.h>
16+
#include <llvm/ADT/SetOperations.h>
17+
#include <llvm/IR/Dominators.h>
18+
#include <llvm/IR/InstIterator.h>
19+
#include <llvm/IR/Instructions.h>
20+
#include <llvm/IR/Constants.h>
21+
#include "common/LLVMWarningsPop.hpp"
22+
23+
using namespace llvm;
24+
using namespace IGC;
25+
26+
// Register pass to igc-opt
27+
IGC_INITIALIZE_PASS_BEGIN(AllocationBasedLivenessAnalysis, "igc-allocation-based-liveness-analysis", "Analyze the lifetimes of instruction allocated by a specific intrinsic", false, true)
28+
IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
29+
IGC_INITIALIZE_PASS_END(AllocationBasedLivenessAnalysis, "igc-allocation-based-liveness-analysis", "Analyze the lifetimes of instruction allocated by a specific intrinsic", false, true)
30+
31+
char AllocationBasedLivenessAnalysis::ID = 0;
32+
33+
void AllocationBasedLivenessAnalysis::getAnalysisUsage(llvm::AnalysisUsage& AU) const
34+
{
35+
AU.setPreservesAll();
36+
AU.addRequired<DominatorTreeWrapperPass>();
37+
}
38+
39+
AllocationBasedLivenessAnalysis::AllocationBasedLivenessAnalysis() : FunctionPass(ID)
40+
{
41+
initializeAllocationBasedLivenessAnalysisPass(*llvm::PassRegistry::getPassRegistry());
42+
}
43+
44+
bool AllocationBasedLivenessAnalysis::runOnFunction(llvm::Function& F)
45+
{
46+
// collect all allocation instructions
47+
SmallVector<Instruction*> allocationInstructions;
48+
49+
for (auto& I : instructions(F))
50+
{
51+
if (isa<AllocaInst>(&I))
52+
allocationInstructions.push_back(&I);
53+
}
54+
55+
clearLivenessInfo();
56+
57+
for (auto* I : allocationInstructions)
58+
{
59+
m_LivenessInfo.push_back(std::make_pair(I, ProcessInstruction(I)));
60+
}
61+
62+
return false;
63+
}
64+
65+
AllocationBasedLivenessAnalysis::LivenessData* AllocationBasedLivenessAnalysis::ProcessInstruction(Instruction* I)
66+
{
67+
// static allocas are usually going to be in the entry block
68+
// that's a practice, but we only care about the last block that dominates all uses
69+
BasicBlock* commonDominator = nullptr;
70+
auto* DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
71+
72+
bool hasNoLifetimeEnd = false;
73+
74+
SetVector<Instruction*> allUsers;
75+
SmallVector<Use*> worklist;
76+
77+
for (auto& use : I->uses())
78+
{
79+
auto* UasI = cast<Instruction>(use.getUser());
80+
if (commonDominator)
81+
{
82+
commonDominator = DT->findNearestCommonDominator(commonDominator, UasI->getParent());
83+
}
84+
else
85+
{
86+
commonDominator = UasI->getParent();
87+
}
88+
89+
worklist.push_back(&use);
90+
}
91+
92+
// figure out the potential accesses to the memory via GEP and bitcasts
93+
while (!worklist.empty() && !hasNoLifetimeEnd)
94+
{
95+
auto* use = worklist.pop_back_val();;
96+
auto* II = cast<Instruction>(use->getUser());
97+
98+
if (allUsers.contains(II))
99+
continue;
100+
101+
allUsers.insert(II);
102+
103+
switch (II->getOpcode())
104+
{
105+
case Instruction::GetElementPtr:
106+
case Instruction::BitCast:
107+
for (auto& use : II->uses())
108+
worklist.push_back(&use);
109+
110+
break;
111+
case Instruction::PtrToInt:
112+
hasNoLifetimeEnd = true;
113+
break;
114+
case Instruction::Store:
115+
if (cast<StoreInst>(II)->getValueOperand() == cast<Value>(use))
116+
hasNoLifetimeEnd = true;
117+
break;
118+
case Instruction::Call:
119+
if (!cast<CallInst>(II)->getCalledFunction()->getArg(use->getOperandNo())->hasAttribute(llvm::Attribute::NoCapture))
120+
hasNoLifetimeEnd = true;
121+
break;
122+
default:
123+
break;
124+
}
125+
}
126+
127+
// we add the return instructions to the list of users to express the infinite lifetime
128+
if (hasNoLifetimeEnd)
129+
{
130+
for_each(instructions(*I->getFunction()),
131+
[&](auto& II)
132+
{
133+
if (isa<ReturnInst>(&II))
134+
allUsers.insert(&II);
135+
}
136+
);
137+
}
138+
139+
return new LivenessData(I, allUsers, commonDominator);
140+
}
141+
142+
AllocationBasedLivenessAnalysis::LivenessData::LivenessData(Instruction* allocationInstruction, SetVector<Instruction*> usersOfAllocation, BasicBlock* userDominatorBlock)
143+
{
144+
if (!userDominatorBlock)
145+
userDominatorBlock = allocationInstruction->getParent();
146+
147+
bbOut.insert(userDominatorBlock);
148+
149+
SmallVector<BasicBlock*> worklist;
150+
151+
for (auto* I : usersOfAllocation)
152+
{
153+
worklist.push_back(I->getParent());
154+
}
155+
156+
// perform data flow analysis
157+
while (!worklist.empty())
158+
{
159+
auto* currbb = worklist.pop_back_val();
160+
161+
if (bbIn.contains(currbb) || currbb == userDominatorBlock)
162+
continue;
163+
164+
bbIn.insert(currbb);
165+
166+
for (auto* pbb : llvm::predecessors(currbb))
167+
{
168+
bbOut.insert(pbb);
169+
worklist.push_back(pbb);
170+
}
171+
}
172+
173+
// fill out the lifetime start/ends instruction
174+
for (auto& I : *userDominatorBlock)
175+
{
176+
lifetimeStart = &I;
177+
if (usersOfAllocation.contains(&I))
178+
break;
179+
180+
}
181+
182+
// if bbIn is empty, the entire lifetime is contained within userDominatorBlock
183+
if (bbIn.empty())
184+
{
185+
for (auto& I : llvm::reverse(*userDominatorBlock))
186+
{
187+
if (usersOfAllocation.contains(&I))
188+
{
189+
lifetimeEnds.push_back(&I);
190+
break;
191+
}
192+
}
193+
194+
// clear the bbOut to indicate lifetime does not leave any block;
195+
bbOut.clear();
196+
}
197+
else
198+
{
199+
auto bbOnlyIn = bbIn;
200+
set_subtract(bbOnlyIn, bbOut);
201+
202+
for (auto* bb : bbOnlyIn)
203+
{
204+
for (auto& I : llvm::reverse(*bb))
205+
{
206+
if (usersOfAllocation.contains(&I))
207+
{
208+
lifetimeEnds.push_back(&I);
209+
break;
210+
}
211+
}
212+
}
213+
}
214+
}
215+
216+
bool AllocationBasedLivenessAnalysis::LivenessData::OverlapsWith(const LivenessData& LD) const
217+
{
218+
auto overlapIn = bbIn;
219+
set_intersect(overlapIn, LD.bbIn);
220+
221+
auto overlapOut = bbOut;
222+
set_intersect(overlapOut, LD.bbOut);
223+
224+
// check if both lifetimes flow out or in the same block, this means overlap
225+
if (!overlapIn.empty() || !overlapOut.empty())
226+
return true;
227+
228+
// check lifetime boundaries
229+
for (auto& [LD1, LD2] : { std::make_pair(*this, LD), std::make_pair(LD, *this) })
230+
{
231+
if (LD1.lifetimeEnds.size() == 1 && *LD1.lifetimeEnds.begin() == LD1.lifetimeStart)
232+
continue;
233+
234+
for (auto* I : LD1.lifetimeEnds)
235+
{
236+
if (I->getParent() == LD2.lifetimeStart->getParent())
237+
{
238+
if (LD2.lifetimeStart->comesBefore(I))
239+
return true;
240+
}
241+
}
242+
}
243+
244+
return false;
245+
}
246+
247+
// Register pass to igc-opt
248+
IGC_INITIALIZE_PASS_BEGIN(MergeAllocas, "igc-merge-allocas", "Try to reuse allocas with nonoverlapping lifetimes", false, false)
249+
IGC_INITIALIZE_PASS_DEPENDENCY(AllocationBasedLivenessAnalysis)
250+
IGC_INITIALIZE_PASS_END(MergeAllocas, "igc-merge-allocas", "Try to reuse allocas with nonoverlapping lifetimes", false, false)
251+
252+
char MergeAllocas::ID = 0;
253+
254+
namespace IGC
255+
{
256+
Pass* createMergeAllocas()
257+
{
258+
return new MergeAllocas();
259+
}
260+
}
261+
262+
MergeAllocas::MergeAllocas() : FunctionPass(ID)
263+
{
264+
initializeMergeAllocasPass(*llvm::PassRegistry::getPassRegistry());
265+
}
266+
267+
void MergeAllocas::getAnalysisUsage(llvm::AnalysisUsage& AU) const
268+
{
269+
AU.addRequired<AllocationBasedLivenessAnalysis>();
270+
}
271+
272+
bool MergeAllocas::runOnFunction(Function& F)
273+
{
274+
auto ABLA = getAnalysis<AllocationBasedLivenessAnalysis>().getLivenessInfo();
275+
276+
// we group the allocations by type, then sort them into buckets with nonoverlapping liveranges
277+
// can this be generalized into allocas for types of the same size, not only types?
278+
using BucketT = SmallVector<std::pair<Instruction*, AllocationBasedLivenessAnalysis::LivenessData*>>;
279+
DenseMap<std::tuple<llvm::Type*, uint64_t, uint32_t>, SmallVector<BucketT>> buckets;
280+
281+
for (const auto& A : ABLA)
282+
{
283+
const auto& [currI, currLD] = A;
284+
// at this point we assume all I's are alloca instructions
285+
// later AllocationBasedLivenessAnalysis will be generalized to any instruction that can allocate something (like allocaterayquery)
286+
auto* AI = cast<AllocaInst>(currI);
287+
288+
if (!isa<ConstantInt>(AI->getArraySize()))
289+
continue;
290+
291+
auto& perTypeBuckets = buckets[std::make_tuple(
292+
AI->getAllocatedType(),
293+
cast<ConstantInt>(AI->getArraySize())->getZExtValue(),
294+
AI->getAddressSpace()
295+
)];
296+
297+
bool found = false;
298+
299+
for (auto& bucket : perTypeBuckets)
300+
{
301+
if (llvm::none_of(bucket, [&](std::pair<Instruction*, AllocationBasedLivenessAnalysis::LivenessData*> b) { return b.second->OverlapsWith(*A.second); }))
302+
{
303+
bucket.push_back(std::make_pair(currI, currLD));
304+
found = true;
305+
break;
306+
}
307+
}
308+
309+
if (!found)
310+
{
311+
perTypeBuckets.push_back({ std::make_pair(currI, currLD) });
312+
}
313+
}
314+
315+
bool changed = false;
316+
317+
for (const auto& [_, perTypeBuckets] : buckets)
318+
{
319+
for (const auto& bucket : perTypeBuckets)
320+
{
321+
if (bucket.size() == 1)
322+
{
323+
continue;
324+
}
325+
326+
Instruction* firstAlloca = nullptr;
327+
for (const auto& [I, _] : bucket)
328+
{
329+
if (!firstAlloca)
330+
{
331+
firstAlloca = I;
332+
firstAlloca->moveBefore(F.getEntryBlock().getFirstNonPHI());
333+
firstAlloca->setName(VALUE_NAME("MergedAlloca"));
334+
}
335+
else
336+
{
337+
I->replaceAllUsesWith(firstAlloca);
338+
I->eraseFromParent();
339+
}
340+
341+
}
342+
343+
changed = true;
344+
}
345+
}
346+
347+
return changed;
348+
}

0 commit comments

Comments
 (0)