Skip to content

Commit d420269

Browse files
stefan-iligcbot
authored andcommitted
Revert recursion in isRegionInvariant
Revert change due to performance regression.
1 parent 9d93442 commit d420269

File tree

2 files changed

+77
-79
lines changed

2 files changed

+77
-79
lines changed

IGC/Compiler/CISACodeGen/WIAnalysis.cpp

Lines changed: 67 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ SPDX-License-Identifier: MIT
1616
#include "common/igc_regkeys.hpp"
1717
#include "GenISAIntrinsics/GenIntrinsicInst.h"
1818
#include "common/LLVMWarningsPush.hpp"
19-
#include <llvm/ADT/SmallVector.h>
2019
#include <llvm/IR/Function.h>
2120
#include <llvm/IR/CFG.h>
2221
#include <llvm/Support/CommandLine.h>
@@ -26,7 +25,6 @@ SPDX-License-Identifier: MIT
2625

2726
#include <string>
2827
#include <sstream>
29-
#include <cstdint>
3028
#include "Probe/Assertion.h"
3129

3230
using namespace llvm;
@@ -311,7 +309,10 @@ bool WIAnalysisRunner::run()
311309
m_depMap.Initialize(m_TT);
312310
m_TT->RegisterListener(&m_depMap);
313311

314-
m_pChangedNew.clear();
312+
m_changed1.clear();
313+
m_changed2.clear();
314+
m_pChangedNew = &m_changed1;
315+
m_pChangedOld = &m_changed2;
315316
m_ctrlBranches.clear();
316317

317318
m_storeDepMap.clear();
@@ -325,10 +326,13 @@ bool WIAnalysisRunner::run()
325326
// Compute the first iteration of the WI-dep according to ordering
326327
// instructions this ordering is generally good (as it ususally correlates
327328
// well with dominance).
328-
for (auto& inst: llvm::instructions(F))
329+
inst_iterator it = inst_begin(F);
330+
inst_iterator e = inst_end(F);
331+
for (; it != e; ++it)
329332
{
330-
calculate_dep(&inst);
333+
calculate_dep(&*it);
331334
}
335+
332336
// Recursively check if WI-dep changes and if so reclaculates
333337
// the WI-dep and marks the users for re-checking.
334338
// This procedure is guranteed to converge since WI-dep can only
@@ -348,13 +352,13 @@ bool WIAnalysisRunner::run()
348352
const Value* use = (*UI);
349353
if (!visited.count(use) && use->getType() == V->getType())
350354
{
351-
if (auto* INS = dyn_cast<InsertElementInst>(use))
355+
if (auto INS = dyn_cast<InsertElementInst>(use))
352356
{
353357
if (!isUniform(use))
354358
m_depMap.SetAttribute(INS, WIAnalysis::UNIFORM_THREAD);
355359
m_forcedUniforms.push_back(use);
356360
}
357-
else if (auto* PHI = dyn_cast<PHINode>(use))
361+
else if (auto PHI = dyn_cast<PHINode>(use))
358362
{
359363
if (!isUniform(use))
360364
m_depMap.SetAttribute(PHI, WIAnalysis::UNIFORM_THREAD);
@@ -396,22 +400,23 @@ bool WIAnalysis::runOnFunction(Function& F)
396400

397401
void WIAnalysisRunner::updateDeps()
398402
{
399-
std::vector<const llvm::Value*> m_pChangedOld;
400403
// As lonst as we have values to update
401-
while (!m_pChangedNew.empty())
404+
while (!m_pChangedNew->empty())
402405
{
403406
// swap between changedSet pointers - recheck the newChanged(now old)
404407
std::swap(m_pChangedNew, m_pChangedOld);
405408
// clear the newChanged set so it will be filled with the users of
406-
// instruction which their WI-dep changed during the current iteration
407-
m_pChangedNew.clear();
409+
// instruction which their WI-dep canged during the current iteration
410+
m_pChangedNew->clear();
408411

409412
// update all changed values
410-
for (const auto* val: m_pChangedOld)
413+
std::vector<const Value*>::iterator it = m_pChangedOld->begin();
414+
std::vector<const Value*>::iterator e = m_pChangedOld->end();
415+
for (; it != e; ++it)
411416
{
412417
// remove first instruction
413418
// calculate its new dependencey value
414-
calculate_dep(val);
419+
calculate_dep(*it);
415420
}
416421
}
417422
}
@@ -432,7 +437,7 @@ bool WIAnalysisRunner::isInstructionSimple(const Instruction* inst)
432437
{
433438
return true;
434439
}
435-
if (IsMathIntrinsic(GetOpCode(inst)))
440+
if (IsMathIntrinsic(GetOpCode((Instruction*)inst)))
436441
{
437442
return true;
438443
}
@@ -602,7 +607,7 @@ bool WIAnalysis::insideWorkgroupDivergentCF(const Value* val) const
602607

603608
WIAnalysis::WIDependancy WIAnalysisRunner::whichDepend(const Value* val) const
604609
{
605-
IGC_ASSERT_MESSAGE(m_pChangedNew.empty(), "set should be empty before query");
610+
IGC_ASSERT_MESSAGE(m_pChangedNew->empty(), "set should be empty before query");
606611
IGC_ASSERT_MESSAGE(nullptr != val, "Bad value");
607612
if (isa<Constant>(val))
608613
{
@@ -724,7 +729,7 @@ void WIAnalysisRunner::calculate_dep(const Value* val)
724729
// is not uniform ?
725730
IGC_ASSERT_MESSAGE(isa<Instruction>(val), "Could we reach here with non instruction value?");
726731

727-
const auto* const inst = dyn_cast<Instruction>(val);
732+
const Instruction* const inst = dyn_cast<Instruction>(val);
728733
IGC_ASSERT_MESSAGE(nullptr != inst, "This Value is not an Instruction");
729734
if (inst)
730735
{
@@ -789,7 +794,7 @@ void WIAnalysisRunner::calculate_dep(const Value* val)
789794
// This code could be extended further depending on requirements.
790795
if (inst->getOpcode() == Instruction::AShr)
791796
{
792-
auto* op0 = dyn_cast<BinaryOperator>(inst->getOperand(0));
797+
BinaryOperator* op0 = dyn_cast<BinaryOperator>(inst->getOperand(0));
793798
if (op0 && op0->getOpcode() == Instruction::Add &&
794799
!hasDependency(op0->getOperand(1)))
795800
{
@@ -880,53 +885,40 @@ void WIAnalysisRunner::calculate_dep(const Value* val)
880885
// divergent branch, trigger updates due to control-dependence
881886
if (inst->isTerminator() && dep != WIAnalysis::UNIFORM_GLOBAL)
882887
{
883-
update_cf_dep(cast<IGCLLVM::TerminatorInst>(inst));
888+
update_cf_dep(dyn_cast<IGCLLVM::TerminatorInst>(inst));
884889
}
885890
}
886891
}
887892
}
888893

889-
bool WIAnalysisRunner::isRegionInvariant(const llvm::Instruction* defi, BranchInfo* brInfo)
894+
bool WIAnalysisRunner::isRegionInvariant(const llvm::Instruction* defi, BranchInfo* brInfo, unsigned level)
890895
{
891-
constexpr uint8_t MAX_DEPTH = 4;
892-
struct RegionOperand{
893-
const llvm::Instruction* inst;
894-
uint8_t operandNum;
895-
};
896-
897-
llvm::SmallVector<RegionOperand, MAX_DEPTH> operands;
898-
operands.push_back({defi, 0});
899-
900-
while (!operands.empty())
896+
if (level >= 4)
897+
{
898+
return false;
899+
}
900+
if (isa<PHINode>(defi))
901+
{
902+
return false;
903+
}
904+
const unsigned nOps = defi->getNumOperands();
905+
for (unsigned i = 0; i < nOps; ++i)
901906
{
902-
auto& rop = operands.back();
903-
if (isa<PHINode>(rop.inst))
907+
Value* op = defi->getOperand(i);
908+
Instruction* srci = dyn_cast<Instruction>(op);
909+
if (srci)
904910
{
905-
return false;
906-
}
907-
908-
if (rop.inst->getNumOperands() < rop.operandNum) {
909-
Value* op = rop.inst->getOperand(rop.operandNum);
910-
rop.operandNum++;
911-
auto* srci = dyn_cast<Instruction>(op);
912-
if (srci)
911+
if (!brInfo->influence_region.count(srci->getParent()))
913912
{
914-
if (!brInfo->influence_region.count(srci->getParent()))
915-
{
916-
// go on to check the next operand
917-
continue;
918-
}
919-
if (operands.size() + 1 >= MAX_DEPTH)
920-
{
921-
return false;
922-
}
923-
operands.push_back({srci, 0});
913+
// go on to check the next operand
914+
continue;
915+
}
916+
else if (!isRegionInvariant(srci, brInfo, level + 1))
917+
{
918+
return false;
924919
}
925-
} else {
926-
operands.pop_back();
927920
}
928921
}
929-
930922
return true;
931923
}
932924

@@ -947,7 +939,7 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst)
947939
IGC_ASSERT(hasDependency(inst));
948940
WIBaseClass::WIDependancy instDep = getDependency(inst);
949941

950-
auto* blk = (BasicBlock*)(inst->getParent());
942+
BasicBlock* blk = (BasicBlock*)(inst->getParent());
951943
BasicBlock* ipd = PDT->getNode(blk)->getIDom()->getBlock();
952944
// a branch can have NULL immediate post-dominator when a function
953945
// has multiple exits in llvm-ir
@@ -985,13 +977,13 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst)
985977
IGC_ASSERT(!DT->isReachableFromEntry(PJ));
986978
continue;
987979
}
988-
auto* PJDom = PJNode->getIDom()->getBlock();
980+
auto PJDom = PJNode->getIDom()->getBlock();
989981

990982
// If both partial-join and it IDom are in partial-join region
991983
// there are cases in which phi-nodes in partial-joins are not
992984
// relevant to the cbr under the investigation
993-
auto* LoopA = LI->getLoopFor(PJDom);
994-
auto* LoopB = LI->getLoopFor(PJ);
985+
auto LoopA = LI->getLoopFor(PJDom);
986+
auto LoopB = LI->getLoopFor(PJ);
995987
if (br_info.partial_joins.count(PJDom))
996988
{
997989
// both PJ and its IDom are outside the CBR loop
@@ -1038,15 +1030,15 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst)
10381030
// because it might need to be RANDOM.
10391031
auto it = m_storeDepMap.find(st);
10401032
if (it != m_storeDepMap.end())
1041-
m_pChangedNew.push_back(it->second);
1033+
m_pChangedNew->push_back(it->second);
10421034
}
10431035

10441036
// This is an optimization that tries to detect instruction
10451037
// not really affected by control-flow divergency because
10461038
// all the sources are outside the region.
10471039
// However this is only as good as we can get because we
10481040
// only search limited depth
1049-
if (isRegionInvariant(defi, &br_info))
1041+
if (isRegionInvariant(defi, &br_info, 0))
10501042
{
10511043
continue;
10521044
}
@@ -1061,10 +1053,10 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst)
10611053
Value::use_iterator use_e = defi->use_end();
10621054
for (; use_it != use_e; ++use_it)
10631055
{
1064-
auto* user = dyn_cast<Instruction>((*use_it).getUser());
1056+
Instruction* user = dyn_cast<Instruction>((*use_it).getUser());
10651057
IGC_ASSERT(user);
10661058
BasicBlock* user_blk = user->getParent();
1067-
auto* phi = dyn_cast<PHINode>(user);
1059+
PHINode* phi = dyn_cast<PHINode>(user);
10681060
if (phi)
10691061
{
10701062
// another place we assume all critical edges have been
@@ -1076,8 +1068,8 @@ void WIAnalysisRunner::update_cf_dep(const IGCLLVM::TerminatorInst* inst)
10761068
// local def-use, not related to control-dependence
10771069
continue; // skip
10781070
}
1079-
auto* DefLoop = LI->getLoopFor(def_blk);
1080-
auto* UseLoop = LI->getLoopFor(user_blk);
1071+
auto DefLoop = LI->getLoopFor(def_blk);
1072+
auto UseLoop = LI->getLoopFor(user_blk);
10811073
if (user_blk == br_info.full_join ||
10821074
!br_info.influence_region.count(user_blk) ||
10831075
(br_info.partial_joins.count(user_blk) &&
@@ -1106,7 +1098,7 @@ void WIAnalysisRunner::updatePHIDepAtJoin(BasicBlock* blk, BranchInfo* brInfo)
11061098
for (BasicBlock::iterator I = blk->begin(), E = blk->end(); I != E; ++I)
11071099
{
11081100
Instruction* defi = &(*I);
1109-
auto* phi = dyn_cast<PHINode>(defi);
1101+
PHINode* phi = dyn_cast<PHINode>(defi);
11101102
if (!phi)
11111103
{
11121104
break;
@@ -1120,7 +1112,7 @@ void WIAnalysisRunner::updatePHIDepAtJoin(BasicBlock* blk, BranchInfo* brInfo)
11201112
for (unsigned predIdx = 0; predIdx < phi->getNumOperands(); ++predIdx)
11211113
{
11221114
Value* srcVal = phi->getOperand(predIdx);
1123-
auto* defi = dyn_cast<Instruction>(srcVal);
1115+
Instruction* defi = dyn_cast<Instruction>(srcVal);
11241116
if (defi && brInfo->influence_region.count(defi->getParent()))
11251117
{
11261118
updateDepMap(phi, brDep);
@@ -1160,25 +1152,25 @@ void WIAnalysisRunner::updateDepMap(const Instruction* inst, WIAnalysis::WIDepen
11601152
Value::const_user_iterator e = inst->user_end();
11611153
for (; it != e; ++it)
11621154
{
1163-
m_pChangedNew.push_back(*it);
1155+
m_pChangedNew->push_back(*it);
11641156
}
1165-
if (const auto * st = dyn_cast<StoreInst>(inst))
1157+
if (const StoreInst * st = dyn_cast<StoreInst>(inst))
11661158
{
11671159
auto it = m_storeDepMap.find(st);
11681160
if (it != m_storeDepMap.end())
11691161
{
1170-
m_pChangedNew.push_back(it->second);
1162+
m_pChangedNew->push_back(it->second);
11711163
}
11721164
}
11731165

11741166
if (dep == WIAnalysis::RANDOM)
11751167
{
1176-
EOPCODE eopcode = GetOpCode(inst);
1168+
EOPCODE eopcode = GetOpCode((Instruction*)inst);
11771169
if (eopcode == llvm_insert)
11781170
{
11791171
updateInsertElements((const InsertElementInst*)inst);
11801172
}
1181-
else if (const auto* IVI = dyn_cast<const InsertValueInst>(inst))
1173+
else if (const InsertValueInst* IVI = dyn_cast<const InsertValueInst>(inst))
11821174
{
11831175
updateInsertValues(IVI);
11841176
}
@@ -1189,8 +1181,8 @@ void WIAnalysisRunner::updateDepMap(const Instruction* inst, WIAnalysis::WIDepen
11891181
void WIAnalysisRunner::updateInsertElements(const InsertElementInst* inst)
11901182
{
11911183
/// find the first one in the sequence
1192-
auto* curInst = (InsertElementInst*)inst;
1193-
auto* srcInst = dyn_cast<InsertElementInst>(curInst->getOperand(0));
1184+
InsertElementInst* curInst = (InsertElementInst*)inst;
1185+
InsertElementInst* srcInst = dyn_cast<InsertElementInst>(curInst->getOperand(0));
11941186
while (srcInst)
11951187
{
11961188
if (hasDependency(srcInst) && getDependency(srcInst) == WIAnalysis::RANDOM)
@@ -1205,7 +1197,7 @@ void WIAnalysisRunner::updateInsertElements(const InsertElementInst* inst)
12051197
Value::user_iterator e = curInst->user_end();
12061198
for (; it != e; ++it)
12071199
{
1208-
m_pChangedNew.push_back(*it);
1200+
m_pChangedNew->push_back(*it);
12091201
}
12101202
}
12111203
}
@@ -1223,7 +1215,7 @@ void WIAnalysisRunner::updateInsertValues(const InsertValueInst* Inst)
12231215
{
12241216
/// find the first one in the sequence
12251217
const InsertValueInst* pI = Inst;
1226-
const auto* aI = dyn_cast<const InsertValueInst>(pI->getOperand(0));
1218+
const InsertValueInst* aI = dyn_cast<const InsertValueInst>(pI->getOperand(0));
12271219
while (aI && aI->hasOneUse())
12281220
{
12291221
if (hasDependency(aI) && getDependency(aI) == WIAnalysis::RANDOM)
@@ -1238,7 +1230,7 @@ void WIAnalysisRunner::updateInsertValues(const InsertValueInst* Inst)
12381230
auto e = pI->user_end();
12391231
for (; it != e; ++it)
12401232
{
1241-
m_pChangedNew.push_back(*it);
1233+
m_pChangedNew->push_back(*it);
12421234
}
12431235
}
12441236
}
@@ -1394,7 +1386,7 @@ WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(const CallInst* inst)
13941386
GII_id = GII->getIntrinsicID();
13951387
}
13961388

1397-
const auto* llvmintrin = dyn_cast<llvm::IntrinsicInst>(inst);
1389+
const llvm::IntrinsicInst* llvmintrin = dyn_cast<llvm::IntrinsicInst>(inst);
13981390
if (llvmintrin != nullptr &&
13991391
(llvmintrin->getIntrinsicID() == llvm::Intrinsic::stacksave ||
14001392
llvmintrin->getIntrinsicID() == llvm::Intrinsic::stackrestore)) {

0 commit comments

Comments
 (0)