Skip to content

Commit b54358e

Browse files
bgajdaINTCigcbot
authored andcommitted
Improve code around MCSOptimization.
Code improvements.
1 parent 02e8acf commit b54358e

File tree

2 files changed

+94
-60
lines changed

2 files changed

+94
-60
lines changed

IGC/Compiler/Optimizer/MCSOptimization.cpp

Lines changed: 92 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,20 @@ This transformation is not safe in general. It can be applied only in those case
3030
-We need to know that we don't access out of bound sample index
3131
************************************************************************/
3232
class MCSOptimization : public FunctionPass, public InstVisitor<MCSOptimization> {
33+
private:
34+
struct LdmcsWork {
35+
LdmcsInstrinsic *ldMcs;
36+
LdmsInstrinsic *firstUse;
37+
llvm::SmallVector<LdmsInstrinsic *, 8> ldmsInstsToMove;
38+
};
39+
3340
public:
3441
MCSOptimization() : FunctionPass(ID) {}
3542
bool runOnFunction(Function &F);
3643
void visitCallInst(llvm::CallInst &I);
3744
void getAnalysisUsage(llvm::AnalysisUsage &AU) const { AU.addRequired<CodeGenContextWrapper>(); }
3845
virtual llvm::StringRef getPassName() const { return "MCSOptimization"; }
46+
void ProcessLdmcsAndUsersInstrinsic(LdmcsWork &);
3947

4048
static char ID;
4149
bool m_changed = false;
@@ -50,6 +58,7 @@ class MCSOptimization : public FunctionPass, public InstVisitor<MCSOptimization>
5058
}
5159
return false;
5260
}
61+
llvm::SmallVector<LdmcsWork> m_candidates;
5362

5463
protected:
5564
};
@@ -63,6 +72,9 @@ bool MCSOptimization::runOnFunction(Function &F) {
6372
}
6473
m_changed = false;
6574
visit(F);
75+
for (auto &workItem : m_candidates) {
76+
ProcessLdmcsAndUsersInstrinsic(workItem);
77+
}
6678
return m_changed;
6779
}
6880

@@ -94,6 +106,7 @@ void MCSOptimization::visitCallInst(llvm::CallInst &I) {
94106
const unsigned long long resourceViewMcsMaskElement =
95107
ctx->getModuleMetaData()->m_ShaderResourceViewMcsMask[shaderResourceViewMcsMaskIndex];
96108
const unsigned int resourceViewMaskTextureBit = textureIndex % BITS_PER_QWORD;
109+
97110
IGC_ASSERT_MESSAGE(textureIndex <= 127, "Texture index is incorrectly extracted from ld_mcs");
98111

99112
unsigned long long resultBit = resourceViewMcsMaskElement >> resourceViewMaskTextureBit;
@@ -114,8 +127,9 @@ void MCSOptimization::visitCallInst(llvm::CallInst &I) {
114127
}
115128

116129
if (EEI != nullptr) {
117-
if (EEI->hasOneUse())
130+
if (EEI->hasOneUse()) {
118131
return; // only one use of EEI -- noOptimization
132+
}
119133

120134
LdmsInstrinsic *firstUse = nullptr;
121135

@@ -129,8 +143,9 @@ void MCSOptimization::visitCallInst(llvm::CallInst &I) {
129143
}
130144
}
131145

132-
if (!firstUse)
146+
if (!firstUse) {
133147
return;
148+
}
134149

135150
// collect all blocks where this EEI insts is getting used
136151
std::set<BasicBlock *> useBlocks;
@@ -148,77 +163,94 @@ void MCSOptimization::visitCallInst(llvm::CallInst &I) {
148163
// iterate over useBlocks.
149164
// For each useBlock, collect all the ldms insts present within the use block corresponding to this EEI
150165
for (auto BB : useBlocks) {
151-
std::vector<LdmsInstrinsic *> ldmsInstsToMove;
152-
std::vector<LdmsInstrinsic *> ldmsInstsToClub;
166+
llvm::SmallVector<LdmsInstrinsic *, 8> ldmsInstsToMove;
153167
for (auto inst = BB->begin(); inst != BB->end(); inst++) {
154168
if (LdmsInstrinsic *ldmsIntr = dyn_cast<LdmsInstrinsic>(inst)) {
155169
if (ldmsIntr->getOperand(1) == dyn_cast<Value>(EEI)) {
156-
if (ldmsIntr == firstUse)
170+
if (ldmsIntr == firstUse) {
157171
continue; // don't move the first use into the then block , need it for phi Node
172+
}
158173
ldmsInstsToMove.push_back(ldmsIntr);
159174
}
160175
}
161176
}
162177

163-
// this is added because clubbing all ld2dms into a single then block
164-
// increases register pressure and causes spilling
165-
int instClubThreshold =
166-
IGC_GET_FLAG_VALUE(ld2dmsInstsClubbingThreshold); // # ld2dms insts that can be moved into the then block
167-
// int instClubThreshold = 2;
168-
bool allInstsWillBeMoved = false;
169-
170-
while (!allInstsWillBeMoved) {
171-
ldmsInstsToClub.clear();
172-
// Threshold is more than # of insts that are to be moved. So move all.
173-
if (instClubThreshold >= static_cast<int>(ldmsInstsToMove.size())) {
174-
ldmsInstsToClub = ldmsInstsToMove;
175-
allInstsWillBeMoved = true;
176-
} else {
177-
// pick the first 0-threshold # of insts and move them only
178-
for (int i = 0; i < instClubThreshold; i++) {
179-
ldmsInstsToClub.push_back(ldmsInstsToMove[i]);
180-
}
181-
ldmsInstsToMove.erase(ldmsInstsToMove.begin(), ldmsInstsToMove.begin() + instClubThreshold);
182-
}
178+
LdmcsWork work = {ldMcs, firstUse, ldmsInstsToMove};
179+
if (IGC_IS_FLAG_ENABLED(MCSOptTwoStagesMode)) {
180+
m_candidates.emplace_back(work);
181+
continue;
182+
}
183+
ProcessLdmcsAndUsersInstrinsic(work);
184+
}
185+
}
186+
}
187+
}
183188

184-
// split the block into a new then block
185-
BasicBlock *ldmsUseBB = nullptr; // second entry to the phi node
186-
BasicBlock *thenBlock = nullptr;
187-
IGCLLVM::TerminatorInst *thenBlockTerminator = nullptr;
188-
if (ldmsInstsToClub.size() != 0) {
189-
LdmsInstrinsic *ldmsUse = ldmsInstsToClub[0];
190-
ldmsUseBB = ldmsUse->getParent();
191-
IRB.SetInsertPoint(ldmsUse);
192-
Value *ValueisMCSNotZero = nullptr;
193-
for (unsigned int i = 0; i < ldmsUse->getNumMcsOperands(); i++) {
194-
Value *mcs = firstUse->getMcsOperand(i);
195-
Value *cnd1 = IRB.CreateICmpNE(mcs, ConstantInt::get(mcs->getType(), 0));
196-
if (ValueisMCSNotZero == nullptr) {
197-
ValueisMCSNotZero = cnd1;
198-
} else {
199-
ValueisMCSNotZero = IRB.CreateOr(ValueisMCSNotZero, cnd1);
200-
}
201-
}
202-
thenBlockTerminator = SplitBlockAndInsertIfThen(ValueisMCSNotZero, ldmsUse, false);
203-
thenBlock = thenBlockTerminator->getParent();
204-
}
189+
void MCSOptimization::ProcessLdmcsAndUsersInstrinsic(LdmcsWork &work) {
190+
auto *ldMcs = work.ldMcs;
191+
auto *firstUse = work.firstUse;
192+
auto &ldmsInstsToMove = work.ldmsInstsToMove;
193+
Function *F = ldMcs->getParent()->getParent();
194+
IGCIRBuilder<> IRB(F->getContext());
195+
// this is added because clubbing all ld2dms into a single then block
196+
// increases register pressure and causes spilling
197+
int instClubThreshold =
198+
IGC_GET_FLAG_VALUE(ld2dmsInstsClubbingThreshold); // # ld2dms insts that can be moved into the then block
199+
// int instClubThreshold = 2;
200+
bool allInstsWillBeMoved = false;
205201

206-
// Move the collected ldms insts into the then block and insert their phi nodes in the successor of the then
207-
// block
208-
if (thenBlockTerminator) {
209-
for (auto instToMove : ldmsInstsToClub) {
210-
instToMove->moveBefore(thenBlockTerminator);
211-
IRB.SetInsertPoint(&*(thenBlockTerminator->getSuccessor(0)->begin()));
212-
PHINode *PN = IRB.CreatePHI(instToMove->getType(), 2);
213-
instToMove->replaceAllUsesWith(PN);
214-
PN->addIncoming(instToMove, thenBlock);
215-
PN->addIncoming(firstUse, ldmsUseBB);
216-
m_changed = true;
217-
}
218-
}
202+
bool splitAfterFirstUse = IGC_GET_FLAG_VALUE(Splitld2dmsAfterFirst);
203+
204+
llvm::SmallVector<LdmsInstrinsic *, 8> ldmsInstsToClub;
205+
while (!allInstsWillBeMoved) {
206+
ldmsInstsToClub.clear();
207+
// Threshold is more than # of insts that are to be moved. So move all.
208+
if (instClubThreshold >= static_cast<int>(ldmsInstsToMove.size())) {
209+
ldmsInstsToClub = ldmsInstsToMove;
210+
allInstsWillBeMoved = true;
211+
} else {
212+
// pick the first 0-threshold # of insts and move them only
213+
for (int i = 0; i < instClubThreshold; i++) {
214+
ldmsInstsToClub.push_back(ldmsInstsToMove[i]);
215+
}
216+
ldmsInstsToMove.erase(ldmsInstsToMove.begin(), ldmsInstsToMove.begin() + instClubThreshold);
217+
}
218+
219+
// split the block into a new then block
220+
BasicBlock *ldmsUseBB = nullptr; // second entry to the phi node
221+
BasicBlock *thenBlock = nullptr;
222+
IGCLLVM::TerminatorInst *thenBlockTerminator = nullptr;
223+
if (ldmsInstsToClub.size() != 0) {
224+
LdmsInstrinsic *ldmsUse = splitAfterFirstUse ? firstUse : ldmsInstsToClub[0];
225+
Instruction *splitInsertPoint = splitAfterFirstUse ? ldmsUse->getNextNode() : ldmsUse;
226+
ldmsUseBB = splitInsertPoint->getParent();
227+
IRB.SetInsertPoint(splitInsertPoint);
228+
Value *ValueisMCSNotZero = nullptr;
229+
for (unsigned int i = 0; i < ldmsUse->getNumMcsOperands(); i++) {
230+
Value *mcs = firstUse->getMcsOperand(i);
231+
Value *cnd1 = IRB.CreateICmpNE(mcs, ConstantInt::get(mcs->getType(), 0));
232+
if (ValueisMCSNotZero == nullptr) {
233+
ValueisMCSNotZero = cnd1;
234+
} else {
235+
ValueisMCSNotZero = IRB.CreateOr(ValueisMCSNotZero, cnd1);
219236
}
220237
}
221-
m_changed = true;
238+
thenBlockTerminator = SplitBlockAndInsertIfThen(ValueisMCSNotZero, splitInsertPoint, false);
239+
thenBlock = thenBlockTerminator->getParent();
240+
}
241+
242+
// Move the collected ldms insts into the then block and insert their phi nodes in the successor of the then
243+
// block
244+
if (thenBlockTerminator) {
245+
for (auto instToMove : ldmsInstsToClub) {
246+
instToMove->moveBefore(thenBlockTerminator);
247+
IRB.SetInsertPoint(&*(thenBlockTerminator->getSuccessor(0)->begin()));
248+
PHINode *PN = IRB.CreatePHI(instToMove->getType(), 2);
249+
instToMove->replaceAllUsesWith(PN);
250+
PN->addIncoming(instToMove, thenBlock);
251+
PN->addIncoming(firstUse, ldmsUseBB);
252+
m_changed = true;
253+
}
222254
}
223255
}
224256
}

IGC/common/igc_flags.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,7 @@ DECLARE_IGC_REGKEY(bool, RemoveUnusedSLM, true, "Remove SLM that are not used",
508508
DECLARE_IGC_REGKEY(bool, RemoveUnusedTGMFence, false, "Remove TGM Fences that are not used/read", false)
509509
DECLARE_IGC_REGKEY(bool, EnableCustomLoopVersioning, true, "Enable IGC to do custom loop versioning", false)
510510
DECLARE_IGC_REGKEY(bool, DisableMCSOpt, false, "Disable IGC to run MCS optimization", false)
511+
DECLARE_IGC_REGKEY(bool, MCSOptTwoStagesMode, false, "MCSOptimization gather all candidates than process", false)
511512
DECLARE_IGC_REGKEY(bool, DisableGatingSimilarSamples, false, "Disable Gating of similar sample instructions", false)
512513
DECLARE_IGC_REGKEY(bool, EnableSoftwareStencil, false, "Enable software stencil for PS.", false)
513514
DECLARE_IGC_REGKEY(bool, EnableInterpreterPatternMatching, false,
@@ -717,6 +718,7 @@ DECLARE_IGC_REGKEY(DWORD, RouteByLodHint, 0, "An integer offset addon to route t
717718
DECLARE_IGC_REGKEY(bool, EnableTrivialEmulateSinCos, false, "Enable Emulation for Sine and Cosine instructions", false)
718719
DECLARE_IGC_REGKEY(DWORD, ld2dmsInstsClubbingThreshold, 3,
719720
"Do not club more than these ld2dms insts into the new BB during MCSOpt", false)
721+
DECLARE_IGC_REGKEY(bool, Splitld2dmsAfterFirst, false, "Instead of splitting after second ld2dms message, split after first to avoid waiting", false)
720722
DECLARE_IGC_REGKEY(DWORD, ForcePerThreadPrivateMemorySize, 0,
721723
"Useful for ensuring a certain amount of private memory when doing a shader override.", true)
722724
DECLARE_IGC_REGKEY(DWORD, RetryManagerFirstStateId, 0,

0 commit comments

Comments
 (0)