Skip to content

Commit 91d5830

Browse files
stefan-iligcbot
authored andcommitted
Change MergeAllocas to merge based on size
Currently MergeAllocas was merging based on allocation type, and array element type. To achieve more merging we want to merge allocation based on their size.
1 parent ec8aee7 commit 91d5830

File tree

8 files changed

+317
-118
lines changed

8 files changed

+317
-118
lines changed

IGC/AdaptorCommon/RayTracing/MergeAllocas.cpp

Lines changed: 192 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,23 @@ SPDX-License-Identifier: MIT
66
77
============================= end_copyright_notice ===========================*/
88

9-
#include "Compiler/IGCPassSupport.h"
109
#include "MergeAllocas.h"
10+
#include "Compiler/IGCPassSupport.h"
11+
#include "common/igc_regkeys.hpp"
1112
#include "Probe/Assertion.h"
1213
#include "debug/DebugMacros.hpp"
1314

1415
#include "common/LLVMWarningsPush.hpp"
15-
#include <llvm/ADT/SetVector.h>
1616
#include <llvm/ADT/SetOperations.h>
17+
#include <llvm/ADT/SetVector.h>
1718
#include <llvm/ADT/SmallSet.h>
19+
#include <llvm/ADT/SmallVector.h>
1820
#include <llvm/Analysis/LoopInfo.h>
1921
#include <llvm/IR/Constants.h>
22+
#include <llvm/IR/DataLayout.h>
23+
#include <llvm/IR/DerivedTypes.h>
2024
#include <llvm/IR/Dominators.h>
25+
#include <llvm/IR/Function.h>
2126
#include <llvm/IR/IRBuilder.h>
2227
#include <llvm/IR/InstIterator.h>
2328
#include <llvm/IR/Instructions.h>
@@ -32,6 +37,134 @@ IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
3237
IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
3338
IGC_INITIALIZE_PASS_END(AllocationBasedLivenessAnalysis, "igc-allocation-based-liveness-analysis", "Analyze the lifetimes of instruction allocated by a specific intrinsic", false, true)
3439

40+
// Get size of bytes allocated for type including padding.
41+
static size_t GetByteSize(Type *T, const DataLayout *DL) {
42+
if (T->isSized())
43+
return static_cast<size_t>(DL->getTypeAllocSize(T));
44+
return 0;
45+
}
46+
47+
static AllocaInfo GetAllocaInfo(AllocaInst *alloca,
48+
AllocationBasedLivenessAnalysis::LivenessData *LD,
49+
const DataLayout *DL) {
50+
size_t allocationSize = GetByteSize(alloca->getAllocatedType(), DL);
51+
return {{},
52+
alloca,
53+
LD,
54+
alloca->getAddressSpace(),
55+
allocationSize,
56+
allocationSize,
57+
static_cast<size_t>(
58+
DL->getPrefTypeAlign(alloca->getAllocatedType()).value()),
59+
0};
60+
}
61+
62+
static size_t GetStartingOffset(size_t startOffset, size_t alignment) {
63+
size_t remainder = startOffset % alignment;
64+
if (remainder == 0) {
65+
return startOffset;
66+
}
67+
return startOffset + (alignment - remainder);
68+
}
69+
70+
static bool AddNonOverlappingAlloca(AllocaInfo *MergableAlloca,
71+
AllocaInfo *NewAlloca) {
72+
if (MergableAlloca->addressSpace != NewAlloca->addressSpace) {
73+
return false;
74+
}
75+
if (MergableAlloca->allocationSize < NewAlloca->allocationSize) {
76+
return false;
77+
}
78+
if (MergableAlloca->livenessData->OverlapsWith(*NewAlloca->livenessData)) {
79+
return false;
80+
}
81+
82+
// Check if we can merge alloca to one of existing non-overlapping allocas.
83+
for (auto *NonOverlappingAlloca : MergableAlloca->nonOverlapingAllocas) {
84+
bool added = AddNonOverlappingAlloca(NonOverlappingAlloca, NewAlloca);
85+
if (added) {
86+
return true;
87+
}
88+
}
89+
90+
// Check if we have still space in existing alloca to add new alloca
91+
if (MergableAlloca->remainingSize >= NewAlloca->allocationSize) {
92+
size_t currentOffset =
93+
MergableAlloca->allocationSize - MergableAlloca->remainingSize;
94+
size_t newStartingOffset =
95+
GetStartingOffset(currentOffset, NewAlloca->alignment);
96+
size_t sizeWithPadding =
97+
NewAlloca->allocationSize + (newStartingOffset - currentOffset);
98+
// When adding alignment in consideration we can't fit new alloca.
99+
if (sizeWithPadding > MergableAlloca->remainingSize) {
100+
return false;
101+
}
102+
size_t newAllocaOffset = newStartingOffset + MergableAlloca->offset;
103+
if (newAllocaOffset != 0 && IGC_IS_FLAG_ENABLED(DisableMergingOfMultipleAllocasWithOffset)) {
104+
return false;
105+
}
106+
NewAlloca->offset = newAllocaOffset;
107+
MergableAlloca->nonOverlapingAllocas.push_back(NewAlloca);
108+
MergableAlloca->remainingSize -= sizeWithPadding;
109+
return true;
110+
}
111+
112+
return false;
113+
}
114+
115+
static void ReplaceAllocas(const AllocaInfo &MergableAlloca, Function &F) {
116+
Instruction *topAlloca = MergableAlloca.alloca;
117+
topAlloca->moveBefore(F.getEntryBlock().getFirstNonPHI());
118+
topAlloca->setName(VALUE_NAME("MergedAlloca"));
119+
120+
IRBuilder<> Builder(topAlloca->getParent());
121+
Instruction *topAllocaBitcast = nullptr;
122+
123+
SmallVector<AllocaInfo *> allocasToReplace;
124+
allocasToReplace.insert(allocasToReplace.end(),
125+
MergableAlloca.nonOverlapingAllocas.begin(),
126+
MergableAlloca.nonOverlapingAllocas.end());
127+
128+
while (!allocasToReplace.empty()) {
129+
auto *subAlloca = allocasToReplace.pop_back_val();
130+
131+
auto *subInst = subAlloca->alloca;
132+
auto *ReplacementValue = topAlloca;
133+
134+
if (topAlloca->getType() != subInst->getType()) {
135+
auto *InsertionPoint =
136+
(topAllocaBitcast != nullptr) ? topAllocaBitcast : topAlloca;
137+
Builder.SetInsertPoint(InsertionPoint->getNextNode());
138+
139+
Value *ValueToCast = nullptr;
140+
// If we have offset from original alloca we need to create GEP
141+
if (subAlloca->offset != 0) {
142+
// We can re-use same bitcast
143+
if (topAllocaBitcast == nullptr) {
144+
topAllocaBitcast = cast<Instruction>(
145+
Builder.CreateBitCast(topAlloca, Builder.getInt8PtrTy()));
146+
}
147+
auto *Offset = Builder.getInt32(subAlloca->offset);
148+
auto *GEP = Builder.CreateGEP(Builder.getInt8Ty(),
149+
topAllocaBitcast, Offset);
150+
ValueToCast = GEP;
151+
} else {
152+
// If no offset is needed we can directly cast to target type
153+
ValueToCast = Builder.CreateBitCast(topAlloca, subInst->getType());
154+
}
155+
auto *CastedValue = llvm::cast<Instruction>(
156+
Builder.CreateBitCast(ValueToCast, subInst->getType()));
157+
ReplacementValue = CastedValue;
158+
}
159+
subInst->replaceAllUsesWith(ReplacementValue);
160+
subInst->eraseFromParent();
161+
162+
allocasToReplace.insert(allocasToReplace.end(),
163+
subAlloca->nonOverlapingAllocas.begin(),
164+
subAlloca->nonOverlapingAllocas.end());
165+
}
166+
}
167+
35168
char AllocationBasedLivenessAnalysis::ID = 0;
36169

37170
void AllocationBasedLivenessAnalysis::getAnalysisUsage(llvm::AnalysisUsage& AU) const
@@ -342,20 +475,20 @@ bool AllocationBasedLivenessAnalysis::LivenessData::OverlapsWith(const LivenessD
342475
return true;
343476

344477
// check lifetime boundaries
345-
for (auto& [LD1, LD2] : { std::make_pair(*this, LD), std::make_pair(LD, *this) })
478+
for (auto& [LD1, LD2] : { std::make_pair(this, &LD), std::make_pair(&LD, this) })
346479
{
347-
for (auto* I : LD1.lifetimeEnds)
480+
for (auto* I : LD1->lifetimeEnds)
348481
{
349482
// what if LD1 is contained in a single block
350-
if (I->getParent() == LD1.lifetimeStart->getParent())
483+
if (I->getParent() == LD1->lifetimeStart->getParent())
351484
{
352485
auto* bb = I->getParent();
353-
bool inflow = LD2.bbIn.contains(bb);
354-
bool outflow = LD2.bbOut.contains(bb);
355-
bool lifetimeStart = LD2.lifetimeStart->getParent() == bb && LD2.lifetimeStart->comesBefore(I);
486+
bool inflow = LD2->bbIn.contains(bb);
487+
bool outflow = LD2->bbOut.contains(bb);
488+
bool lifetimeStart = LD2->lifetimeStart->getParent() == bb && LD2->lifetimeStart->comesBefore(I);
356489

357-
auto* LD1_lifetimeStart = LD1.lifetimeStart; // we have to copy LD1.lifetimeStart to avoid clang complaining about LD1 being captured by the lambda
358-
bool lifetimeEnd = any_of(LD2.lifetimeEnds, [&](auto* lifetimeEnd) {
490+
auto* LD1_lifetimeStart = LD1->lifetimeStart; // we have to copy LD1.lifetimeStart to avoid clang complaining about LD1 being captured by the lambda
491+
bool lifetimeEnd = any_of(LD2->lifetimeEnds, [&](auto* lifetimeEnd) {
359492
return lifetimeEnd->getParent() == bb && LD1_lifetimeStart->comesBefore(lifetimeEnd);
360493
});
361494

@@ -371,14 +504,13 @@ bool AllocationBasedLivenessAnalysis::LivenessData::OverlapsWith(const LivenessD
371504
if (lifetimeEnd && lifetimeStart)
372505
return true;
373506
}
374-
else if (I->getParent() == LD2.lifetimeStart->getParent())
507+
else if (I->getParent() == LD2->lifetimeStart->getParent())
375508
{
376-
if (LD2.lifetimeStart->comesBefore(I))
509+
if (LD2->lifetimeStart->comesBefore(I))
377510
return true;
378511
}
379512
}
380513
}
381-
382514
return false;
383515
}
384516

@@ -407,101 +539,66 @@ void MergeAllocas::getAnalysisUsage(llvm::AnalysisUsage& AU) const
407539
AU.addRequired<AllocationBasedLivenessAnalysis>();
408540
}
409541

410-
bool MergeAllocas::runOnFunction(Function& F)
411-
{
412-
if (skipFunction(F)){
542+
bool MergeAllocas::runOnFunction(Function &F) {
543+
if (skipFunction(F)) {
413544
return false;
414545
}
415546

416547
auto ABLA = getAnalysis<AllocationBasedLivenessAnalysis>().getLivenessInfo();
417-
418-
// we group the allocations by type, then sort them into buckets with nonoverlapping liveranges
419-
// can this be generalized into allocas for types of the same size, not only types?
420-
using BucketT = SmallVector<std::pair<Instruction*, AllocationBasedLivenessAnalysis::LivenessData*>>;
421-
DenseMap<std::tuple<llvm::Type*, uint32_t, uint32_t>, SmallVector<BucketT>> buckets;
422-
423-
for (const auto& A : ABLA)
424-
{
425-
const auto& [currI, currLD] = A;
426-
// at this point we assume all I's are alloca instructions
427-
// later AllocationBasedLivenessAnalysis will be generalized to any instruction that can allocate something (like allocaterayquery)
428-
auto* AI = cast<AllocaInst>(currI);
429-
430-
if (!isa<ConstantInt>(AI->getArraySize()))
431-
continue;
432-
433-
auto* AllocatedType = AI->getAllocatedType();
434-
uint32_t IsArray = AllocatedType->isArrayTy()? 1 : 0;
435-
auto& perTypeBuckets = buckets[std::make_tuple(
436-
(IsArray)? AllocatedType->getArrayElementType() : AllocatedType,
437-
IsArray,
438-
AI->getAddressSpace()
439-
)];
440-
441-
bool found = false;
442-
443-
for (auto& bucket : perTypeBuckets)
444-
{
445-
if (llvm::none_of(bucket, [&](std::pair<Instruction*, AllocationBasedLivenessAnalysis::LivenessData*> b) { return b.second->OverlapsWith(*A.second); }))
446-
{
447-
bucket.push_back(std::make_pair(currI, currLD));
448-
found = true;
548+
const auto *DataLayout = &F.getParent()->getDataLayout();
549+
550+
// We group non-overlapping allocas for replacements.
551+
SmallVector<AllocaInfo *> MergableAllocas;
552+
553+
// First we sort analysis results based on allocation size, from larger to
554+
// smaller.
555+
llvm::sort(ABLA, [&](const auto &a, const auto &b) {
556+
return GetByteSize(cast<AllocaInst>(a.first)->getAllocatedType(),
557+
DataLayout) >
558+
GetByteSize(cast<AllocaInst>(b.first)->getAllocatedType(),
559+
DataLayout);
560+
});
561+
562+
// Reserve space for all alloca infos so we can use pointers to them.
563+
AllAllocasInfos.resize(ABLA.size());
564+
size_t currentIndex = 0;
565+
566+
// We iterate over analysis results collecting non-overlapping allocas.
567+
for (const auto &A : ABLA) {
568+
const auto &[currI, currLD] = A;
569+
AllAllocasInfos[currentIndex] =
570+
GetAllocaInfo(cast<AllocaInst>(currI), currLD, DataLayout);
571+
AllocaInfo &AllocaInfo = AllAllocasInfos[currentIndex];
572+
currentIndex++;
573+
574+
// We check if the current alloca overlaps with any of the previously added.
575+
bool added = false;
576+
for (auto *MergableAlloca : MergableAllocas) {
577+
if (AllocaInfo.livenessData->OverlapsWith(*MergableAlloca->livenessData)) {
578+
continue;
579+
}
580+
added = AddNonOverlappingAlloca(MergableAlloca, &AllocaInfo);
581+
if (added) {
449582
break;
450583
}
451584
}
452-
453-
if (!found)
454-
{
455-
perTypeBuckets.push_back({ std::make_pair(currI, currLD) });
585+
// Alloca overlaps with all of the current ones so it will be added as new
586+
// element.
587+
if (!added && AllocaInfo.allocationSize != 0) {
588+
MergableAllocas.push_back(&AllocaInfo);
456589
}
457590
}
458591

459592
bool changed = false;
460593

461-
for (const auto& [allocaType, perTypeBuckets] : buckets)
462-
{
463-
for (const auto& bucket : perTypeBuckets)
464-
{
465-
if (bucket.size() == 1)
466-
{
467-
continue;
468-
}
469-
470-
bool IsArray = std::get<1>(allocaType);
471-
Instruction* firstAlloca = nullptr;
472-
if (IsArray)
473-
{
474-
firstAlloca = std::max_element(bucket.begin(), bucket.end(), [](const auto& a, const auto& b) {
475-
return cast<AllocaInst>(a.first)->getAllocatedType()->getArrayNumElements() < cast<AllocaInst>(b.first)->getAllocatedType()->getArrayNumElements();
476-
})->first;
477-
}
478-
else
479-
{
480-
firstAlloca = bucket[0].first;
481-
}
482-
firstAlloca->moveBefore(F.getEntryBlock().getFirstNonPHI());
483-
firstAlloca->setName(VALUE_NAME("MergedAlloca"));
484-
for (const auto& [I, _] : bucket)
485-
{
486-
if (firstAlloca == I)
487-
{
488-
continue;
489-
}
490-
auto* ReplacementValue = firstAlloca;
491-
if (firstAlloca->getType() != I->getType())
492-
{
493-
IRBuilder<> Builder(firstAlloca->getParent());
494-
Builder.SetInsertPoint(firstAlloca->getNextNode());
495-
auto *CastedValue = llvm::cast<Instruction>(Builder.CreateBitCast(firstAlloca, I->getType()));
496-
ReplacementValue = CastedValue;
497-
};
498-
I->replaceAllUsesWith(ReplacementValue);
499-
I->eraseFromParent();
500-
}
501-
502-
changed = true;
594+
// Replace alloca usages
595+
for (auto *MergableAlloca : MergableAllocas) {
596+
if (MergableAlloca->nonOverlapingAllocas.empty()) {
597+
continue;
503598
}
599+
changed = true;
600+
ReplaceAllocas(*MergableAlloca, F);
504601
}
505602

506603
return changed;
507-
}
604+
}

0 commit comments

Comments
 (0)