@@ -30,12 +30,20 @@ This transformation is not safe in general. It can be applied only in those case
3030-We need to know that we don't access out of bound sample index
3131************************************************************************/
3232class MCSOptimization : public FunctionPass , public InstVisitor <MCSOptimization> {
33+ private:
34+ struct LdmcsWork {
35+ LdmcsInstrinsic *ldMcs;
36+ LdmsInstrinsic *firstUse;
37+ llvm::SmallVector<LdmsInstrinsic *, 8 > ldmsInstsToMove;
38+ };
39+
3340public:
3441 MCSOptimization () : FunctionPass(ID) {}
3542 bool runOnFunction (Function &F);
3643 void visitCallInst (llvm::CallInst &I);
3744 void getAnalysisUsage (llvm::AnalysisUsage &AU) const { AU.addRequired <CodeGenContextWrapper>(); }
3845 virtual llvm::StringRef getPassName () const { return " MCSOptimization" ; }
46+ void ProcessLdmcsAndUsersInstrinsic (LdmcsWork &);
3947
4048 static char ID;
4149 bool m_changed = false ;
@@ -50,6 +58,7 @@ class MCSOptimization : public FunctionPass, public InstVisitor<MCSOptimization>
5058 }
5159 return false ;
5260 }
61+ llvm::SmallVector<LdmcsWork> m_candidates;
5362
5463protected:
5564};
@@ -63,6 +72,9 @@ bool MCSOptimization::runOnFunction(Function &F) {
6372 }
6473 m_changed = false ;
6574 visit (F);
75+ for (auto &workItem : m_candidates) {
76+ ProcessLdmcsAndUsersInstrinsic (workItem);
77+ }
6678 return m_changed;
6779}
6880
@@ -94,6 +106,7 @@ void MCSOptimization::visitCallInst(llvm::CallInst &I) {
94106 const unsigned long long resourceViewMcsMaskElement =
95107 ctx->getModuleMetaData ()->m_ShaderResourceViewMcsMask [shaderResourceViewMcsMaskIndex];
96108 const unsigned int resourceViewMaskTextureBit = textureIndex % BITS_PER_QWORD;
109+
97110 IGC_ASSERT_MESSAGE (textureIndex <= 127 , " Texture index is incorrectly extracted from ld_mcs" );
98111
99112 unsigned long long resultBit = resourceViewMcsMaskElement >> resourceViewMaskTextureBit;
@@ -114,8 +127,9 @@ void MCSOptimization::visitCallInst(llvm::CallInst &I) {
114127 }
115128
116129 if (EEI != nullptr ) {
117- if (EEI->hasOneUse ())
130+ if (EEI->hasOneUse ()) {
118131 return ; // only one use of EEI -- noOptimization
132+ }
119133
120134 LdmsInstrinsic *firstUse = nullptr ;
121135
@@ -129,8 +143,9 @@ void MCSOptimization::visitCallInst(llvm::CallInst &I) {
129143 }
130144 }
131145
132- if (!firstUse)
146+ if (!firstUse) {
133147 return ;
148+ }
134149
135150 // collect all blocks where this EEI insts is getting used
136151 std::set<BasicBlock *> useBlocks;
@@ -148,77 +163,94 @@ void MCSOptimization::visitCallInst(llvm::CallInst &I) {
148163 // iterate over useBlocks.
149164 // For each useBlock, collect all the ldms insts present within the use block corresponding to this EEI
150165 for (auto BB : useBlocks) {
151- std::vector<LdmsInstrinsic *> ldmsInstsToMove;
152- std::vector<LdmsInstrinsic *> ldmsInstsToClub;
166+ llvm::SmallVector<LdmsInstrinsic *, 8 > ldmsInstsToMove;
153167 for (auto inst = BB->begin (); inst != BB->end (); inst++) {
154168 if (LdmsInstrinsic *ldmsIntr = dyn_cast<LdmsInstrinsic>(inst)) {
155169 if (ldmsIntr->getOperand (1 ) == dyn_cast<Value>(EEI)) {
156- if (ldmsIntr == firstUse)
170+ if (ldmsIntr == firstUse) {
157171 continue ; // don't move the first use into the then block , need it for phi Node
172+ }
158173 ldmsInstsToMove.push_back (ldmsIntr);
159174 }
160175 }
161176 }
162177
163- // this is added because clubbing all ld2dms into a single then block
164- // increases register pressure and causes spilling
165- int instClubThreshold =
166- IGC_GET_FLAG_VALUE (ld2dmsInstsClubbingThreshold); // # ld2dms insts that can be moved into the then block
167- // int instClubThreshold = 2;
168- bool allInstsWillBeMoved = false ;
169-
170- while (!allInstsWillBeMoved) {
171- ldmsInstsToClub.clear ();
172- // Threshold is more than # of insts that are to be moved. So move all.
173- if (instClubThreshold >= static_cast <int >(ldmsInstsToMove.size ())) {
174- ldmsInstsToClub = ldmsInstsToMove;
175- allInstsWillBeMoved = true ;
176- } else {
177- // pick the first 0-threshold # of insts and move them only
178- for (int i = 0 ; i < instClubThreshold; i++) {
179- ldmsInstsToClub.push_back (ldmsInstsToMove[i]);
180- }
181- ldmsInstsToMove.erase (ldmsInstsToMove.begin (), ldmsInstsToMove.begin () + instClubThreshold);
182- }
178+ LdmcsWork work = {ldMcs, firstUse, ldmsInstsToMove};
179+ if (IGC_IS_FLAG_ENABLED (MCSOptTwoStagesMode)) {
180+ m_candidates.emplace_back (work);
181+ continue ;
182+ }
183+ ProcessLdmcsAndUsersInstrinsic (work);
184+ }
185+ }
186+ }
187+ }
183188
184- // split the block into a new then block
185- BasicBlock *ldmsUseBB = nullptr ; // second entry to the phi node
186- BasicBlock *thenBlock = nullptr ;
187- IGCLLVM::TerminatorInst *thenBlockTerminator = nullptr ;
188- if (ldmsInstsToClub.size () != 0 ) {
189- LdmsInstrinsic *ldmsUse = ldmsInstsToClub[0 ];
190- ldmsUseBB = ldmsUse->getParent ();
191- IRB.SetInsertPoint (ldmsUse);
192- Value *ValueisMCSNotZero = nullptr ;
193- for (unsigned int i = 0 ; i < ldmsUse->getNumMcsOperands (); i++) {
194- Value *mcs = firstUse->getMcsOperand (i);
195- Value *cnd1 = IRB.CreateICmpNE (mcs, ConstantInt::get (mcs->getType (), 0 ));
196- if (ValueisMCSNotZero == nullptr ) {
197- ValueisMCSNotZero = cnd1;
198- } else {
199- ValueisMCSNotZero = IRB.CreateOr (ValueisMCSNotZero, cnd1);
200- }
201- }
202- thenBlockTerminator = SplitBlockAndInsertIfThen (ValueisMCSNotZero, ldmsUse, false );
203- thenBlock = thenBlockTerminator->getParent ();
204- }
189+ void MCSOptimization::ProcessLdmcsAndUsersInstrinsic (LdmcsWork &work) {
190+ auto *ldMcs = work.ldMcs ;
191+ auto *firstUse = work.firstUse ;
192+ auto &ldmsInstsToMove = work.ldmsInstsToMove ;
193+ Function *F = ldMcs->getParent ()->getParent ();
194+ IGCIRBuilder<> IRB (F->getContext ());
195+ // this is added because clubbing all ld2dms into a single then block
196+ // increases register pressure and causes spilling
197+ int instClubThreshold =
198+ IGC_GET_FLAG_VALUE (ld2dmsInstsClubbingThreshold); // # ld2dms insts that can be moved into the then block
199+ // int instClubThreshold = 2;
200+ bool allInstsWillBeMoved = false ;
205201
206- // Move the collected ldms insts into the then block and insert their phi nodes in the successor of the then
207- // block
208- if (thenBlockTerminator) {
209- for (auto instToMove : ldmsInstsToClub) {
210- instToMove->moveBefore (thenBlockTerminator);
211- IRB.SetInsertPoint (&*(thenBlockTerminator->getSuccessor (0 )->begin ()));
212- PHINode *PN = IRB.CreatePHI (instToMove->getType (), 2 );
213- instToMove->replaceAllUsesWith (PN);
214- PN->addIncoming (instToMove, thenBlock);
215- PN->addIncoming (firstUse, ldmsUseBB);
216- m_changed = true ;
217- }
218- }
202+ bool splitAfterFirstUse = IGC_GET_FLAG_VALUE (Splitld2dmsAfterFirst);
203+
204+ llvm::SmallVector<LdmsInstrinsic *, 8 > ldmsInstsToClub;
205+ while (!allInstsWillBeMoved) {
206+ ldmsInstsToClub.clear ();
207+ // Threshold is more than # of insts that are to be moved. So move all.
208+ if (instClubThreshold >= static_cast <int >(ldmsInstsToMove.size ())) {
209+ ldmsInstsToClub = ldmsInstsToMove;
210+ allInstsWillBeMoved = true ;
211+ } else {
212+ // pick the first 0-threshold # of insts and move them only
213+ for (int i = 0 ; i < instClubThreshold; i++) {
214+ ldmsInstsToClub.push_back (ldmsInstsToMove[i]);
215+ }
216+ ldmsInstsToMove.erase (ldmsInstsToMove.begin (), ldmsInstsToMove.begin () + instClubThreshold);
217+ }
218+
219+ // split the block into a new then block
220+ BasicBlock *ldmsUseBB = nullptr ; // second entry to the phi node
221+ BasicBlock *thenBlock = nullptr ;
222+ IGCLLVM::TerminatorInst *thenBlockTerminator = nullptr ;
223+ if (ldmsInstsToClub.size () != 0 ) {
224+ LdmsInstrinsic *ldmsUse = splitAfterFirstUse ? firstUse : ldmsInstsToClub[0 ];
225+ Instruction *splitInsertPoint = splitAfterFirstUse ? ldmsUse->getNextNode () : ldmsUse;
226+ ldmsUseBB = splitInsertPoint->getParent ();
227+ IRB.SetInsertPoint (splitInsertPoint);
228+ Value *ValueisMCSNotZero = nullptr ;
229+ for (unsigned int i = 0 ; i < ldmsUse->getNumMcsOperands (); i++) {
230+ Value *mcs = firstUse->getMcsOperand (i);
231+ Value *cnd1 = IRB.CreateICmpNE (mcs, ConstantInt::get (mcs->getType (), 0 ));
232+ if (ValueisMCSNotZero == nullptr ) {
233+ ValueisMCSNotZero = cnd1;
234+ } else {
235+ ValueisMCSNotZero = IRB.CreateOr (ValueisMCSNotZero, cnd1);
219236 }
220237 }
221- m_changed = true ;
238+ thenBlockTerminator = SplitBlockAndInsertIfThen (ValueisMCSNotZero, splitInsertPoint, false );
239+ thenBlock = thenBlockTerminator->getParent ();
240+ }
241+
242+ // Move the collected ldms insts into the then block and insert their phi nodes in the successor of the then
243+ // block
244+ if (thenBlockTerminator) {
245+ for (auto instToMove : ldmsInstsToClub) {
246+ instToMove->moveBefore (thenBlockTerminator);
247+ IRB.SetInsertPoint (&*(thenBlockTerminator->getSuccessor (0 )->begin ()));
248+ PHINode *PN = IRB.CreatePHI (instToMove->getType (), 2 );
249+ instToMove->replaceAllUsesWith (PN);
250+ PN->addIncoming (instToMove, thenBlock);
251+ PN->addIncoming (firstUse, ldmsUseBB);
252+ m_changed = true ;
253+ }
222254 }
223255 }
224256}
0 commit comments