@@ -6,18 +6,23 @@ SPDX-License-Identifier: MIT
66
77============================= end_copyright_notice ===========================*/
88
9- #include " Compiler/IGCPassSupport.h"
109#include " MergeAllocas.h"
10+ #include " Compiler/IGCPassSupport.h"
11+ #include " common/igc_regkeys.hpp"
1112#include " Probe/Assertion.h"
1213#include " debug/DebugMacros.hpp"
1314
1415#include " common/LLVMWarningsPush.hpp"
15- #include < llvm/ADT/SetVector.h>
1616#include < llvm/ADT/SetOperations.h>
17+ #include < llvm/ADT/SetVector.h>
1718#include < llvm/ADT/SmallSet.h>
19+ #include < llvm/ADT/SmallVector.h>
1820#include < llvm/Analysis/LoopInfo.h>
1921#include < llvm/IR/Constants.h>
22+ #include < llvm/IR/DataLayout.h>
23+ #include < llvm/IR/DerivedTypes.h>
2024#include < llvm/IR/Dominators.h>
25+ #include < llvm/IR/Function.h>
2126#include < llvm/IR/IRBuilder.h>
2227#include < llvm/IR/InstIterator.h>
2328#include < llvm/IR/Instructions.h>
@@ -32,6 +37,134 @@ IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
3237IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
3338IGC_INITIALIZE_PASS_END(AllocationBasedLivenessAnalysis, " igc-allocation-based-liveness-analysis" , " Analyze the lifetimes of instruction allocated by a specific intrinsic" , false , true )
3439
40+ // Get size of bytes allocated for type including padding.
41+ static size_t GetByteSize(Type *T, const DataLayout *DL) {
42+ if (T->isSized ())
43+ return static_cast <size_t >(DL->getTypeAllocSize (T));
44+ return 0 ;
45+ }
46+
47+ static AllocaInfo GetAllocaInfo (AllocaInst *alloca,
48+ AllocationBasedLivenessAnalysis::LivenessData *LD,
49+ const DataLayout *DL) {
50+ size_t allocationSize = GetByteSize (alloca->getAllocatedType (), DL);
51+ return {{},
52+ alloca,
53+ LD,
54+ alloca->getAddressSpace (),
55+ allocationSize,
56+ allocationSize,
57+ static_cast <size_t >(
58+ DL->getPrefTypeAlign (alloca->getAllocatedType ()).value ()),
59+ 0 };
60+ }
61+
62+ static size_t GetStartingOffset (size_t startOffset, size_t alignment) {
63+ size_t remainder = startOffset % alignment;
64+ if (remainder == 0 ) {
65+ return startOffset;
66+ }
67+ return startOffset + (alignment - remainder);
68+ }
69+
70+ static bool AddNonOverlappingAlloca (AllocaInfo *MergableAlloca,
71+ AllocaInfo *NewAlloca) {
72+ if (MergableAlloca->addressSpace != NewAlloca->addressSpace ) {
73+ return false ;
74+ }
75+ if (MergableAlloca->allocationSize < NewAlloca->allocationSize ) {
76+ return false ;
77+ }
78+ if (MergableAlloca->livenessData ->OverlapsWith (*NewAlloca->livenessData )) {
79+ return false ;
80+ }
81+
82+ // Check if we can merge alloca to one of existing non-overlapping allocas.
83+ for (auto *NonOverlappingAlloca : MergableAlloca->nonOverlapingAllocas ) {
84+ bool added = AddNonOverlappingAlloca (NonOverlappingAlloca, NewAlloca);
85+ if (added) {
86+ return true ;
87+ }
88+ }
89+
90+ // Check if we have still space in existing alloca to add new alloca
91+ if (MergableAlloca->remainingSize >= NewAlloca->allocationSize ) {
92+ size_t currentOffset =
93+ MergableAlloca->allocationSize - MergableAlloca->remainingSize ;
94+ size_t newStartingOffset =
95+ GetStartingOffset (currentOffset, NewAlloca->alignment );
96+ size_t sizeWithPadding =
97+ NewAlloca->allocationSize + (newStartingOffset - currentOffset);
98+ // When adding alignment in consideration we can't fit new alloca.
99+ if (sizeWithPadding > MergableAlloca->remainingSize ) {
100+ return false ;
101+ }
102+ size_t newAllocaOffset = newStartingOffset + MergableAlloca->offset ;
103+ if (newAllocaOffset != 0 && IGC_IS_FLAG_ENABLED (DisableMergingOfMultipleAllocasWithOffset)) {
104+ return false ;
105+ }
106+ NewAlloca->offset = newAllocaOffset;
107+ MergableAlloca->nonOverlapingAllocas .push_back (NewAlloca);
108+ MergableAlloca->remainingSize -= sizeWithPadding;
109+ return true ;
110+ }
111+
112+ return false ;
113+ }
114+
115+ static void ReplaceAllocas (const AllocaInfo &MergableAlloca, Function &F) {
116+ Instruction *topAlloca = MergableAlloca.alloca ;
117+ topAlloca->moveBefore (F.getEntryBlock ().getFirstNonPHI ());
118+ topAlloca->setName (VALUE_NAME (" MergedAlloca" ));
119+
120+ IRBuilder<> Builder (topAlloca->getParent ());
121+ Instruction *topAllocaBitcast = nullptr ;
122+
123+ SmallVector<AllocaInfo *> allocasToReplace;
124+ allocasToReplace.insert (allocasToReplace.end (),
125+ MergableAlloca.nonOverlapingAllocas .begin (),
126+ MergableAlloca.nonOverlapingAllocas .end ());
127+
128+ while (!allocasToReplace.empty ()) {
129+ auto *subAlloca = allocasToReplace.pop_back_val ();
130+
131+ auto *subInst = subAlloca->alloca ;
132+ auto *ReplacementValue = topAlloca;
133+
134+ if (topAlloca->getType () != subInst->getType ()) {
135+ auto *InsertionPoint =
136+ (topAllocaBitcast != nullptr ) ? topAllocaBitcast : topAlloca;
137+ Builder.SetInsertPoint (InsertionPoint->getNextNode ());
138+
139+ Value *ValueToCast = nullptr ;
140+ // If we have offset from original alloca we need to create GEP
141+ if (subAlloca->offset != 0 ) {
142+ // We can re-use same bitcast
143+ if (topAllocaBitcast == nullptr ) {
144+ topAllocaBitcast = cast<Instruction>(
145+ Builder.CreateBitCast (topAlloca, Builder.getInt8PtrTy ()));
146+ }
147+ auto *Offset = Builder.getInt32 (subAlloca->offset );
148+ auto *GEP = Builder.CreateGEP (Builder.getInt8Ty (),
149+ topAllocaBitcast, Offset);
150+ ValueToCast = GEP;
151+ } else {
152+ // If no offset is needed we can directly cast to target type
153+ ValueToCast = Builder.CreateBitCast (topAlloca, subInst->getType ());
154+ }
155+ auto *CastedValue = llvm::cast<Instruction>(
156+ Builder.CreateBitCast (ValueToCast, subInst->getType ()));
157+ ReplacementValue = CastedValue;
158+ }
159+ subInst->replaceAllUsesWith (ReplacementValue);
160+ subInst->eraseFromParent ();
161+
162+ allocasToReplace.insert (allocasToReplace.end (),
163+ subAlloca->nonOverlapingAllocas .begin (),
164+ subAlloca->nonOverlapingAllocas .end ());
165+ }
166+ }
167+
35168char AllocationBasedLivenessAnalysis::ID = 0 ;
36169
37170void AllocationBasedLivenessAnalysis::getAnalysisUsage (llvm::AnalysisUsage& AU) const
@@ -342,20 +475,20 @@ bool AllocationBasedLivenessAnalysis::LivenessData::OverlapsWith(const LivenessD
342475 return true ;
343476
344477 // check lifetime boundaries
345- for (auto & [LD1, LD2] : { std::make_pair (* this , LD), std::make_pair (LD, * this ) })
478+ for (auto & [LD1, LD2] : { std::make_pair (this , & LD), std::make_pair (& LD, this ) })
346479 {
347- for (auto * I : LD1. lifetimeEnds )
480+ for (auto * I : LD1-> lifetimeEnds )
348481 {
349482 // what if LD1 is contained in a single block
350- if (I->getParent () == LD1. lifetimeStart ->getParent ())
483+ if (I->getParent () == LD1-> lifetimeStart ->getParent ())
351484 {
352485 auto * bb = I->getParent ();
353- bool inflow = LD2. bbIn .contains (bb);
354- bool outflow = LD2. bbOut .contains (bb);
355- bool lifetimeStart = LD2. lifetimeStart ->getParent () == bb && LD2. lifetimeStart ->comesBefore (I);
486+ bool inflow = LD2-> bbIn .contains (bb);
487+ bool outflow = LD2-> bbOut .contains (bb);
488+ bool lifetimeStart = LD2-> lifetimeStart ->getParent () == bb && LD2-> lifetimeStart ->comesBefore (I);
356489
357- auto * LD1_lifetimeStart = LD1. lifetimeStart ; // we have to copy LD1.lifetimeStart to avoid clang complaining about LD1 being captured by the lambda
358- bool lifetimeEnd = any_of (LD2. lifetimeEnds , [&](auto * lifetimeEnd) {
490+ auto * LD1_lifetimeStart = LD1-> lifetimeStart ; // we have to copy LD1.lifetimeStart to avoid clang complaining about LD1 being captured by the lambda
491+ bool lifetimeEnd = any_of (LD2-> lifetimeEnds , [&](auto * lifetimeEnd) {
359492 return lifetimeEnd->getParent () == bb && LD1_lifetimeStart->comesBefore (lifetimeEnd);
360493 });
361494
@@ -371,14 +504,13 @@ bool AllocationBasedLivenessAnalysis::LivenessData::OverlapsWith(const LivenessD
371504 if (lifetimeEnd && lifetimeStart)
372505 return true ;
373506 }
374- else if (I->getParent () == LD2. lifetimeStart ->getParent ())
507+ else if (I->getParent () == LD2-> lifetimeStart ->getParent ())
375508 {
376- if (LD2. lifetimeStart ->comesBefore (I))
509+ if (LD2-> lifetimeStart ->comesBefore (I))
377510 return true ;
378511 }
379512 }
380513 }
381-
382514 return false ;
383515}
384516
@@ -407,101 +539,66 @@ void MergeAllocas::getAnalysisUsage(llvm::AnalysisUsage& AU) const
407539 AU.addRequired <AllocationBasedLivenessAnalysis>();
408540}
409541
410- bool MergeAllocas::runOnFunction (Function& F)
411- {
412- if (skipFunction (F)){
542+ bool MergeAllocas::runOnFunction (Function &F) {
543+ if (skipFunction (F)) {
413544 return false ;
414545 }
415546
416547 auto ABLA = getAnalysis<AllocationBasedLivenessAnalysis>().getLivenessInfo ();
417-
418- // we group the allocations by type, then sort them into buckets with nonoverlapping liveranges
419- // can this be generalized into allocas for types of the same size, not only types?
420- using BucketT = SmallVector<std::pair<Instruction*, AllocationBasedLivenessAnalysis::LivenessData*>>;
421- DenseMap<std::tuple<llvm::Type*, uint32_t , uint32_t >, SmallVector<BucketT>> buckets;
422-
423- for (const auto & A : ABLA)
424- {
425- const auto & [currI, currLD] = A;
426- // at this point we assume all I's are alloca instructions
427- // later AllocationBasedLivenessAnalysis will be generalized to any instruction that can allocate something (like allocaterayquery)
428- auto * AI = cast<AllocaInst>(currI);
429-
430- if (!isa<ConstantInt>(AI->getArraySize ()))
431- continue ;
432-
433- auto * AllocatedType = AI->getAllocatedType ();
434- uint32_t IsArray = AllocatedType->isArrayTy ()? 1 : 0 ;
435- auto & perTypeBuckets = buckets[std::make_tuple (
436- (IsArray)? AllocatedType->getArrayElementType () : AllocatedType,
437- IsArray,
438- AI->getAddressSpace ()
439- )];
440-
441- bool found = false ;
442-
443- for (auto & bucket : perTypeBuckets)
444- {
445- if (llvm::none_of (bucket, [&](std::pair<Instruction*, AllocationBasedLivenessAnalysis::LivenessData*> b) { return b.second ->OverlapsWith (*A.second ); }))
446- {
447- bucket.push_back (std::make_pair (currI, currLD));
448- found = true ;
548+ const auto *DataLayout = &F.getParent ()->getDataLayout ();
549+
550+ // We group non-overlapping allocas for replacements.
551+ SmallVector<AllocaInfo *> MergableAllocas;
552+
553+ // First we sort analysis results based on allocation size, from larger to
554+ // smaller.
555+ llvm::sort (ABLA, [&](const auto &a, const auto &b) {
556+ return GetByteSize (cast<AllocaInst>(a.first )->getAllocatedType (),
557+ DataLayout) >
558+ GetByteSize (cast<AllocaInst>(b.first )->getAllocatedType (),
559+ DataLayout);
560+ });
561+
562+ // Reserve space for all alloca infos so we can use pointers to them.
563+ AllAllocasInfos.resize (ABLA.size ());
564+ size_t currentIndex = 0 ;
565+
566+ // We iterate over analysis results collecting non-overlapping allocas.
567+ for (const auto &A : ABLA) {
568+ const auto &[currI, currLD] = A;
569+ AllAllocasInfos[currentIndex] =
570+ GetAllocaInfo (cast<AllocaInst>(currI), currLD, DataLayout);
571+ AllocaInfo &AllocaInfo = AllAllocasInfos[currentIndex];
572+ currentIndex++;
573+
574+ // We check if the current alloca overlaps with any of the previously added.
575+ bool added = false ;
576+ for (auto *MergableAlloca : MergableAllocas) {
577+ if (AllocaInfo.livenessData ->OverlapsWith (*MergableAlloca->livenessData )) {
578+ continue ;
579+ }
580+ added = AddNonOverlappingAlloca (MergableAlloca, &AllocaInfo);
581+ if (added) {
449582 break ;
450583 }
451584 }
452-
453- if (!found)
454- {
455- perTypeBuckets .push_back ({ std::make_pair (currI, currLD) } );
585+ // Alloca overlaps with all of the current ones so it will be added as new
586+ // element.
587+ if (!added && AllocaInfo. allocationSize != 0 ) {
588+ MergableAllocas .push_back (&AllocaInfo );
456589 }
457590 }
458591
459592 bool changed = false ;
460593
461- for (const auto & [allocaType, perTypeBuckets] : buckets)
462- {
463- for (const auto & bucket : perTypeBuckets)
464- {
465- if (bucket.size () == 1 )
466- {
467- continue ;
468- }
469-
470- bool IsArray = std::get<1 >(allocaType);
471- Instruction* firstAlloca = nullptr ;
472- if (IsArray)
473- {
474- firstAlloca = std::max_element (bucket.begin (), bucket.end (), [](const auto & a, const auto & b) {
475- return cast<AllocaInst>(a.first )->getAllocatedType ()->getArrayNumElements () < cast<AllocaInst>(b.first )->getAllocatedType ()->getArrayNumElements ();
476- })->first ;
477- }
478- else
479- {
480- firstAlloca = bucket[0 ].first ;
481- }
482- firstAlloca->moveBefore (F.getEntryBlock ().getFirstNonPHI ());
483- firstAlloca->setName (VALUE_NAME (" MergedAlloca" ));
484- for (const auto & [I, _] : bucket)
485- {
486- if (firstAlloca == I)
487- {
488- continue ;
489- }
490- auto * ReplacementValue = firstAlloca;
491- if (firstAlloca->getType () != I->getType ())
492- {
493- IRBuilder<> Builder (firstAlloca->getParent ());
494- Builder.SetInsertPoint (firstAlloca->getNextNode ());
495- auto *CastedValue = llvm::cast<Instruction>(Builder.CreateBitCast (firstAlloca, I->getType ()));
496- ReplacementValue = CastedValue;
497- };
498- I->replaceAllUsesWith (ReplacementValue);
499- I->eraseFromParent ();
500- }
501-
502- changed = true ;
594+ // Replace alloca usages
595+ for (auto *MergableAlloca : MergableAllocas) {
596+ if (MergableAlloca->nonOverlapingAllocas .empty ()) {
597+ continue ;
503598 }
599+ changed = true ;
600+ ReplaceAllocas (*MergableAlloca, F);
504601 }
505602
506603 return changed;
507- }
604+ }
0 commit comments