Skip to content

Commit 7f4d215

Browse files
stefan-iligcbot
authored andcommitted
Avoid recursion in isRegionInvariant
Switch to stack in isRegionInvariant.
1 parent d930d20 commit 7f4d215

File tree

2 files changed

+79
-77
lines changed

2 files changed

+79
-77
lines changed

IGC/Compiler/CISACodeGen/WIAnalysis.cpp

Lines changed: 75 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ SPDX-License-Identifier: MIT
1616
#include "common/igc_regkeys.hpp"
1717
#include "GenISAIntrinsics/GenIntrinsicInst.h"
1818
#include "common/LLVMWarningsPush.hpp"
19+
#include <llvm/ADT/SmallVector.h>
1920
#include <llvm/IR/Function.h>
2021
#include <llvm/IR/CFG.h>
2122
#include <llvm/Support/CommandLine.h>
@@ -25,6 +26,7 @@ SPDX-License-Identifier: MIT
2526

2627
#include <string>
2728
#include <sstream>
29+
#include <cstdint>
2830
#include "Probe/Assertion.h"
2931

3032
using namespace llvm;
@@ -309,10 +311,7 @@ bool WIAnalysisRunner::run()
309311
m_depMap.Initialize(m_TT);
310312
m_TT->RegisterListener(&m_depMap);
311313

312-
m_changed1.clear();
313-
m_changed2.clear();
314-
m_pChangedNew = &m_changed1;
315-
m_pChangedOld = &m_changed2;
314+
m_pChangedNew.clear();
316315
m_ctrlBranches.clear();
317316

318317
m_storeDepMap.clear();
@@ -326,13 +325,10 @@ bool WIAnalysisRunner::run()
326325
// Compute the first iteration of the WI-dep according to ordering
327326
// instructions this ordering is generally good (as it ususally correlates
328327
// well with dominance).
329-
inst_iterator it = inst_begin(F);
330-
inst_iterator e = inst_end(F);
331-
for (; it != e; ++it)
328+
for (auto& inst: llvm::instructions(F))
332329
{
333-
calculate_dep(&*it);
330+
calculate_dep(&inst);
334331
}
335-
336332
// Recursively check if WI-dep changes and if so reclaculates
337333
// the WI-dep and marks the users for re-checking.
338334
// This procedure is guranteed to converge since WI-dep can only
@@ -352,13 +348,13 @@ bool WIAnalysisRunner::run()
352348
const Value* use = (*UI);
353349
if (!visited.count(use) && use->getType() == V->getType())
354350
{
355-
if (auto INS = dyn_cast<InsertElementInst>(use))
351+
if (auto* INS = dyn_cast<InsertElementInst>(use))
356352
{
357353
if (!isUniform(use))
358354
m_depMap.SetAttribute(INS, WIAnalysis::UNIFORM_THREAD);
359355
m_forcedUniforms.push_back(use);
360356
}
361-
else if (auto PHI = dyn_cast<PHINode>(use))
357+
else if (auto* PHI = dyn_cast<PHINode>(use))
362358
{
363359
if (!isUniform(use))
364360
m_depMap.SetAttribute(PHI, WIAnalysis::UNIFORM_THREAD);
@@ -400,23 +396,22 @@ bool WIAnalysis::runOnFunction(Function& F)
400396

401397
void WIAnalysisRunner::updateDeps()
402398
{
399+
std::vector<const llvm::Value*> m_pChangedOld;
403400
// As lonst as we have values to update
404-
while (!m_pChangedNew->empty())
401+
while (!m_pChangedNew.empty())
405402
{
406403
// swap between changedSet pointers - recheck the newChanged(now old)
407404
std::swap(m_pChangedNew, m_pChangedOld);
408405
// clear the newChanged set so it will be filled with the users of
409-
// instruction which their WI-dep canged during the current iteration
410-
m_pChangedNew->clear();
406+
// instruction which their WI-dep changed during the current iteration
407+
m_pChangedNew.clear();
411408

412409
// update all changed values
413-
std::vector<const Value*>::iterator it = m_pChangedOld->begin();
414-
std::vector<const Value*>::iterator e = m_pChangedOld->end();
415-
for (; it != e; ++it)
410+
for (const auto* val: m_pChangedOld)
416411
{
417412
// remove first instruction
418413
// calculate its new dependencey value
419-
calculate_dep(*it);
414+
calculate_dep(val);
420415
}
421416
}
422417
}
@@ -437,7 +432,7 @@ bool WIAnalysisRunner::isInstructionSimple(const Instruction* inst)
437432
{
438433
return true;
439434
}
440-
if (IsMathIntrinsic(GetOpCode((Instruction*)inst)))
435+
if (IsMathIntrinsic(GetOpCode(inst)))
441436
{
442437
return true;
443438
}
@@ -607,7 +602,7 @@ bool WIAnalysis::insideWorkgroupDivergentCF(const Value* val) const
607602

608603
WIAnalysis::WIDependancy WIAnalysisRunner::whichDepend(const Value* val) const
609604
{
610-
IGC_ASSERT_MESSAGE(m_pChangedNew->empty(), "set should be empty before query");
605+
IGC_ASSERT_MESSAGE(m_pChangedNew.empty(), "set should be empty before query");
611606
IGC_ASSERT_MESSAGE(nullptr != val, "Bad value");
612607
if (isa<Constant>(val))
613608
{
@@ -729,7 +724,7 @@ void WIAnalysisRunner::calculate_dep(const Value* val)
729724
// is not uniform ?
730725
IGC_ASSERT_MESSAGE(isa<Instruction>(val), "Could we reach here with non instruction value?");
731726

732-
const Instruction* const inst = dyn_cast<Instruction>(val);
727+
const auto* const inst = dyn_cast<Instruction>(val);
733728
IGC_ASSERT_MESSAGE(nullptr != inst, "This Value is not an Instruction");
734729
if (inst)
735730
{
@@ -794,7 +789,7 @@ void WIAnalysisRunner::calculate_dep(const Value* val)
794789
// This code could be extended further depending on requirements.
795790
if (inst->getOpcode() == Instruction::AShr)
796791
{
797-
BinaryOperator* op0 = dyn_cast<BinaryOperator>(inst->getOperand(0));
792+
auto* op0 = dyn_cast<BinaryOperator>(inst->getOperand(0));
798793
if (op0 && op0->getOpcode() == Instruction::Add &&
799794
!hasDependency(op0->getOperand(1)))
800795
{
@@ -885,40 +880,53 @@ void WIAnalysisRunner::calculate_dep(const Value* val)
885880
// divergent branch, trigger updates due to control-dependence
886881
if (inst->isTerminator() && dep != WIAnalysis::UNIFORM_GLOBAL)
887882
{
888-
update_cf_dep(dyn_cast<IGCLLVM::TerminatorInst>(inst));
883+
update_cf_dep(cast<IGCLLVM::TerminatorInst>(inst));
889884
}
890885
}
891886
}
892887
}
893888

894-
bool WIAnalysisRunner::isRegionInvariant(const llvm::Instruction* defi, BranchInfo* brInfo, unsigned level)
889+
bool WIAnalysisRunner::isRegionInvariant(const llvm::Instruction* defi, BranchInfo* brInfo)
895890
{
896-
if (level >= 4)
897-
{
898-
return false;
899-
}
900-
if (isa<PHINode>(defi))
901-
{
902-
return false;
903-
}
904-
const unsigned nOps = defi->getNumOperands();
905-
for (unsigned i = 0; i < nOps; ++i)
891+
constexpr uint8_t MAX_DEPTH = 4;
892+
struct RegionOperand{
893+
const llvm::Instruction* inst;
894+
uint8_t operandNum;
895+
};
896+
897+
llvm::SmallVector<RegionOperand, MAX_DEPTH> operands;
898+
operands.push_back({defi, 0});
899+
900+
while (!operands.empty())
906901
{
907-
Value* op = defi->getOperand(i);
908-
Instruction* srci = dyn_cast<Instruction>(op);
909-
if (srci)
902+
auto& rop = operands.back();
903+
if (isa<PHINode>(rop.inst))
910904
{
911-
if (!brInfo->influence_region.count(srci->getParent()))
912-
{
913-
// go on to check the next operand
914-
continue;
915-
}
916-
else if (!isRegionInvariant(srci, brInfo, level + 1))
905+
return false;
906+
}
907+
908+
if (rop.inst->getNumOperands() < rop.operandNum) {
909+
Value* op = rop.inst->getOperand(rop.operandNum);
910+
rop.operandNum++;
911+
auto* srci = dyn_cast<Instruction>(op);
912+
if (srci)
917913
{
918-
return false;
914+
if (!brInfo->influence_region.count(srci->getParent()))
915+
{
916+
// go on to check the next operand
917+
continue;
918+
}
919+
if (operands.size() + 1 >= MAX_DEPTH)
920+
{
921+
return false;
922+
}
923+
operands.push_back({srci, 0});
919924
}
925+
} else {
926+
operands.pop_back();
920927
}
921928
}
929+
922930
return true;
923931
}
924932

@@ -939,7 +947,7 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst)
939947
IGC_ASSERT(hasDependency(inst));
940948
WIBaseClass::WIDependancy instDep = getDependency(inst);
941949

942-
BasicBlock* blk = (BasicBlock*)(inst->getParent());
950+
auto* blk = (BasicBlock*)(inst->getParent());
943951
BasicBlock* ipd = PDT->getNode(blk)->getIDom()->getBlock();
944952
// a branch can have NULL immediate post-dominator when a function
945953
// has multiple exits in llvm-ir
@@ -977,13 +985,13 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst)
977985
IGC_ASSERT(!DT->isReachableFromEntry(PJ));
978986
continue;
979987
}
980-
auto PJDom = PJNode->getIDom()->getBlock();
988+
auto* PJDom = PJNode->getIDom()->getBlock();
981989

982990
// If both partial-join and it IDom are in partial-join region
983991
// there are cases in which phi-nodes in partial-joins are not
984992
// relevant to the cbr under the investigation
985-
auto LoopA = LI->getLoopFor(PJDom);
986-
auto LoopB = LI->getLoopFor(PJ);
993+
auto* LoopA = LI->getLoopFor(PJDom);
994+
auto* LoopB = LI->getLoopFor(PJ);
987995
if (br_info.partial_joins.count(PJDom))
988996
{
989997
// both PJ and its IDom are outside the CBR loop
@@ -1030,15 +1038,15 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst)
10301038
// because it might need to be RANDOM.
10311039
auto it = m_storeDepMap.find(st);
10321040
if (it != m_storeDepMap.end())
1033-
m_pChangedNew->push_back(it->second);
1041+
m_pChangedNew.push_back(it->second);
10341042
}
10351043

10361044
// This is an optimization that tries to detect instruction
10371045
// not really affected by control-flow divergency because
10381046
// all the sources are outside the region.
10391047
// However this is only as good as we can get because we
10401048
// only search limited depth
1041-
if (isRegionInvariant(defi, &br_info, 0))
1049+
if (isRegionInvariant(defi, &br_info))
10421050
{
10431051
continue;
10441052
}
@@ -1053,10 +1061,10 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst)
10531061
Value::use_iterator use_e = defi->use_end();
10541062
for (; use_it != use_e; ++use_it)
10551063
{
1056-
Instruction* user = dyn_cast<Instruction>((*use_it).getUser());
1064+
auto* user = dyn_cast<Instruction>((*use_it).getUser());
10571065
IGC_ASSERT(user);
10581066
BasicBlock* user_blk = user->getParent();
1059-
PHINode* phi = dyn_cast<PHINode>(user);
1067+
auto* phi = dyn_cast<PHINode>(user);
10601068
if (phi)
10611069
{
10621070
// another place we assume all critical edges have been
@@ -1068,8 +1076,8 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst)
10681076
// local def-use, not related to control-dependence
10691077
continue; // skip
10701078
}
1071-
auto DefLoop = LI->getLoopFor(def_blk);
1072-
auto UseLoop = LI->getLoopFor(user_blk);
1079+
auto* DefLoop = LI->getLoopFor(def_blk);
1080+
auto* UseLoop = LI->getLoopFor(user_blk);
10731081
if (user_blk == br_info.full_join ||
10741082
!br_info.influence_region.count(user_blk) ||
10751083
(br_info.partial_joins.count(user_blk) &&
@@ -1098,7 +1106,7 @@ void WIAnalysisRunner::updatePHIDepAtJoin(BasicBlock* blk, BranchInfo* brInfo)
10981106
for (BasicBlock::iterator I = blk->begin(), E = blk->end(); I != E; ++I)
10991107
{
11001108
Instruction* defi = &(*I);
1101-
PHINode* phi = dyn_cast<PHINode>(defi);
1109+
auto* phi = dyn_cast<PHINode>(defi);
11021110
if (!phi)
11031111
{
11041112
break;
@@ -1112,7 +1120,7 @@ void WIAnalysisRunner::updatePHIDepAtJoin(BasicBlock* blk, BranchInfo* brInfo)
11121120
for (unsigned predIdx = 0; predIdx < phi->getNumOperands(); ++predIdx)
11131121
{
11141122
Value* srcVal = phi->getOperand(predIdx);
1115-
Instruction* defi = dyn_cast<Instruction>(srcVal);
1123+
auto* defi = dyn_cast<Instruction>(srcVal);
11161124
if (defi && brInfo->influence_region.count(defi->getParent()))
11171125
{
11181126
updateDepMap(phi, brDep);
@@ -1152,25 +1160,25 @@ void WIAnalysisRunner::updateDepMap(const Instruction* inst, WIAnalysis::WIDepen
11521160
Value::const_user_iterator e = inst->user_end();
11531161
for (; it != e; ++it)
11541162
{
1155-
m_pChangedNew->push_back(*it);
1163+
m_pChangedNew.push_back(*it);
11561164
}
1157-
if (const StoreInst * st = dyn_cast<StoreInst>(inst))
1165+
if (const auto * st = dyn_cast<StoreInst>(inst))
11581166
{
11591167
auto it = m_storeDepMap.find(st);
11601168
if (it != m_storeDepMap.end())
11611169
{
1162-
m_pChangedNew->push_back(it->second);
1170+
m_pChangedNew.push_back(it->second);
11631171
}
11641172
}
11651173

11661174
if (dep == WIAnalysis::RANDOM)
11671175
{
1168-
EOPCODE eopcode = GetOpCode((Instruction*)inst);
1176+
EOPCODE eopcode = GetOpCode(inst);
11691177
if (eopcode == llvm_insert)
11701178
{
11711179
updateInsertElements((const InsertElementInst*)inst);
11721180
}
1173-
else if (const InsertValueInst* IVI = dyn_cast<const InsertValueInst>(inst))
1181+
else if (const auto* IVI = dyn_cast<const InsertValueInst>(inst))
11741182
{
11751183
updateInsertValues(IVI);
11761184
}
@@ -1181,8 +1189,8 @@ void WIAnalysisRunner::updateDepMap(const Instruction* inst, WIAnalysis::WIDepen
11811189
void WIAnalysisRunner::updateInsertElements(const InsertElementInst* inst)
11821190
{
11831191
/// find the first one in the sequence
1184-
InsertElementInst* curInst = (InsertElementInst*)inst;
1185-
InsertElementInst* srcInst = dyn_cast<InsertElementInst>(curInst->getOperand(0));
1192+
auto* curInst = (InsertElementInst*)inst;
1193+
auto* srcInst = dyn_cast<InsertElementInst>(curInst->getOperand(0));
11861194
while (srcInst)
11871195
{
11881196
if (hasDependency(srcInst) && getDependency(srcInst) == WIAnalysis::RANDOM)
@@ -1197,7 +1205,7 @@ void WIAnalysisRunner::updateInsertElements(const InsertElementInst* inst)
11971205
Value::user_iterator e = curInst->user_end();
11981206
for (; it != e; ++it)
11991207
{
1200-
m_pChangedNew->push_back(*it);
1208+
m_pChangedNew.push_back(*it);
12011209
}
12021210
}
12031211
}
@@ -1215,7 +1223,7 @@ void WIAnalysisRunner::updateInsertValues(const InsertValueInst* Inst)
12151223
{
12161224
/// find the first one in the sequence
12171225
const InsertValueInst* pI = Inst;
1218-
const InsertValueInst* aI = dyn_cast<const InsertValueInst>(pI->getOperand(0));
1226+
const auto* aI = dyn_cast<const InsertValueInst>(pI->getOperand(0));
12191227
while (aI && aI->hasOneUse())
12201228
{
12211229
if (hasDependency(aI) && getDependency(aI) == WIAnalysis::RANDOM)
@@ -1230,7 +1238,7 @@ void WIAnalysisRunner::updateInsertValues(const InsertValueInst* Inst)
12301238
auto e = pI->user_end();
12311239
for (; it != e; ++it)
12321240
{
1233-
m_pChangedNew->push_back(*it);
1241+
m_pChangedNew.push_back(*it);
12341242
}
12351243
}
12361244
}
@@ -1386,7 +1394,7 @@ WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(const CallInst* inst)
13861394
GII_id = GII->getIntrinsicID();
13871395
}
13881396

1389-
const llvm::IntrinsicInst* llvmintrin = dyn_cast<llvm::IntrinsicInst>(inst);
1397+
const auto* llvmintrin = dyn_cast<llvm::IntrinsicInst>(inst);
13901398
if (llvmintrin != nullptr &&
13911399
(llvmintrin->getIntrinsicID() == llvm::Intrinsic::stacksave ||
13921400
llvmintrin->getIntrinsicID() == llvm::Intrinsic::stackrestore)) {

0 commit comments

Comments
 (0)