Skip to content

Commit 2df9fa3

Browse files
jgu222sys_zuul
authored andcommitted
Added code to mark divergent BB. The divergent info
is a separate field within BB for now. It will be used to replace InSIMDFlow field. Change-Id: I68aba5d496314e4d776b447896f56ab2c4f7edb4
1 parent 4245b39 commit 2df9fa3

File tree

3 files changed

+280
-4
lines changed

3 files changed

+280
-4
lines changed

visa/FlowGraph.cpp

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,8 @@ void FlowGraph::constructFlowGraph(INST_LIST& instlist)
933933
kernelInfo->updateExitBB(BBs.back());
934934
}
935935

936+
markDivergentBBs();
937+
936938
builder->materializeGlobalImm(getEntryBB());
937939
normalizeRegionDescriptors();
938940
localDataFlowAnalysis();
@@ -2774,6 +2776,270 @@ void FlowGraph::markSimdBlocks(std::map<std::string, G4_BB*>& labelMap, FuncInfo
27742776
}
27752777
}
27762778

2779+
// markDivergentBBs()
2780+
// If BB is on the divergent path, mark it as divergent.
2781+
// Divergent:
2782+
// If all active simd lanes on entry to shader/kernel are
2783+
// active in a BB, that BB is NOT divergent; otherwise,
2784+
// it is divergent.
2785+
// Note: this will be used to replace inSIMDCF gradually.
2786+
void FlowGraph::markDivergentBBs()
2787+
{
2788+
// Assumption:
2789+
// 1. For each function, it has a single return (for function)
2790+
// or exit (for entry function). And that return/exit is the
2791+
// last BB of that function.
2792+
// 2. The entry function will appear first in the BB list, and
2793+
// if there is a call from A to B, A shall appear prior to B
2794+
// in BB list.
2795+
// 3. There is no indirect call, and no recursive call.
2796+
//
2797+
// Required: need to set BB id.
2798+
//
2799+
// Key variables:
2800+
// LastJoinBB:
2801+
// LastJoinBB is the fartherest joinBB of any goto/break/if/while
2802+
// so far, as described below in the algorithm.
2803+
// LastJoinBBId: Id(LastJoinBB).
2804+
//
2805+
// The algorithm initializes LastJoinBBId to -1 and scans all BBs in order.
2806+
// It checks control-flow instructions to see if it diverges (has join).
2807+
// If so, the algorithm sets LastJoinBBId to be the larger one of its join BB
2808+
// and LastJoinBBId; For a non-negative LastJoinBBId, it means that there is
2809+
// an active join in that BB, and therefore, all BBs from the current BB to
2810+
// that LastJoinBB (LastJoinBB not included) will be in divergent path.
2811+
//
2812+
// The algorithm checks the following cases and their join BBs are:
2813+
// case 1: cf inst = goto
2814+
// <currBB> [(p)] goto L
2815+
// ...
2816+
// <joinBB> L:
2817+
// case 2: cf inst = if
2818+
// <currBB> if
2819+
// ...
2820+
// <joinBB> endif
2821+
//
2822+
// case 3: cf inst = break
2823+
// <currBB> break
2824+
// ...
2825+
// [(p)] while
2826+
// <joinBB>
2827+
// case 4:
2828+
// <currBB> L:
2829+
// ....
2830+
// [(p)] while/goto L
2831+
// <joinBB>
2832+
//
2833+
int LastJoinBBId;
2834+
2835+
auto pushJoin = [&](G4_BB* joinBB) {
2836+
LastJoinBBId = std::max(LastJoinBBId, (int)joinBB->getId());
2837+
};
2838+
auto popJoinIfMatch = [&](G4_BB* BB) {
2839+
if ((int)BB->getId() == LastJoinBBId) {
2840+
LastJoinBBId = -1;
2841+
}
2842+
};
2843+
2844+
if (BBs.empty())
2845+
{
2846+
// Sanity check
2847+
return;
2848+
}
2849+
2850+
reassignBlockIDs();
2851+
2852+
// Analyze function in topological order. As there is no recursion
2853+
// and no indirect call, a function will be analyzed only if all
2854+
// its callers have been analyzed.
2855+
//
2856+
// If no subroutine, sortedFuncTable is empty. Here keep all functions
2857+
// in a vector first (it works with and without subroutines), then scan
2858+
// functions in topological order.
2859+
struct StartEndIter {
2860+
BB_LIST_ITER StartI;
2861+
BB_LIST_ITER EndI;
2862+
bool InvokedFromDivergentBB;
2863+
};
2864+
int numFuncs = (int)sortedFuncTable.size();
2865+
std::vector<StartEndIter> allFuncs;
2866+
std::unordered_map<FuncInfo*, uint32_t> funcInfoIndex;
2867+
if (numFuncs == 0)
2868+
{
2869+
numFuncs = 1;
2870+
allFuncs.resize(1);
2871+
allFuncs[0].StartI = BBs.begin();
2872+
allFuncs[0].EndI = BBs.end();
2873+
allFuncs[0].InvokedFromDivergentBB = false;
2874+
}
2875+
else
2876+
{
2877+
allFuncs.resize(numFuncs);
2878+
for (int i = numFuncs; i > 0; --i)
2879+
{
2880+
uint32_t ix = (uint32_t)(i - 1);
2881+
FuncInfo* pFInfo = sortedFuncTable[ix];
2882+
G4_BB* StartBB = pFInfo->getInitBB();
2883+
G4_BB* EndBB = pFInfo->getExitBB();
2884+
uint32_t ui = (uint32_t)(numFuncs - i);
2885+
allFuncs[ui].StartI = std::find(BBs.begin(), BBs.end(), StartBB);
2886+
auto nextI = std::find(BBs.begin(), BBs.end(), EndBB);
2887+
assert(nextI != BBs.end() && "ICE: subroutine's end BB not found!");
2888+
allFuncs[ui].EndI = (++nextI);
2889+
allFuncs[ui].InvokedFromDivergentBB = false;
2890+
2891+
funcInfoIndex[pFInfo] = ui;
2892+
}
2893+
}
2894+
2895+
for (int i = 0; i < numFuncs; ++i)
2896+
{
2897+
// each function: [IT, IE)
2898+
BB_LIST_ITER& IT = allFuncs[i].StartI;
2899+
BB_LIST_ITER& IE = allFuncs[i].EndI;
2900+
if (IT == IE)
2901+
{
2902+
// sanity check
2903+
continue;
2904+
}
2905+
2906+
if (allFuncs[i].InvokedFromDivergentBB)
2907+
{
2908+
// subroutine's divergent on entry. Mark every BB as divergent
2909+
for (; IT != IE; ++IT)
2910+
{
2911+
G4_BB* BB = *IT;
2912+
2913+
BB->setDivergent(true);
2914+
// set InSIMDFlow as well, will merge two gradually
2915+
BB->setInSimdFlow(true);
2916+
2917+
if (BB->size() == 0)
2918+
{
2919+
// sanity check
2920+
continue;
2921+
}
2922+
if (BB->isEndWithCall() || BB->isEndWithFCall())
2923+
{
2924+
FuncInfo* calleeFunc = BB->getCalleeInfo();
2925+
if (funcInfoIndex.count(calleeFunc))
2926+
{
2927+
int ix = funcInfoIndex[calleeFunc];
2928+
allFuncs[ix].InvokedFromDivergentBB = true;
2929+
}
2930+
}
2931+
}
2932+
// continue for next func
2933+
continue;
2934+
}
2935+
2936+
LastJoinBBId = -1;
2937+
for (; IT != IE; ++IT)
2938+
{
2939+
G4_BB* BB = *IT;
2940+
2941+
// This handles cases in which BB has endif/while/join as well as others
2942+
// so we don't need to explicitly check whether BB has endif/while/join, etc.
2943+
popJoinIfMatch(BB);
2944+
2945+
// Handle loop
2946+
for (auto iter = BB->Preds.begin(), iterEnd = BB->Preds.end(); iter != iterEnd; ++iter)
2947+
{
2948+
G4_BB* predBB = *iter;
2949+
if (predBB->getId() < BB->getId())
2950+
continue;
2951+
2952+
BB_LIST_ITER LoopIterEnd = std::find(BBs.begin(), BBs.end(), predBB);
2953+
2954+
// joinBB is the BB right after backward-goto/while
2955+
BB_LIST_ITER loopJoinIter = LoopIterEnd;
2956+
++loopJoinIter;
2957+
assert(loopJoinIter != BBs.end() && "ICE: missing join BB!");
2958+
G4_BB* joinBB = *loopJoinIter;
2959+
2960+
// Scan loop to find any out-of-loop branch, set join if any
2961+
for (auto LoopIter = IT; LoopIter != LoopIterEnd; ++LoopIter)
2962+
{
2963+
G4_BB* BB1 = *LoopIter;
2964+
if (BB1->size() == 0)
2965+
{
2966+
continue;
2967+
}
2968+
G4_INST* lastInst = BB1->back();
2969+
if ((lastInst->opcode() == G4_break || lastInst->opcode() == G4_goto) &&
2970+
!lastInst->asCFInst()->isUniform())
2971+
{
2972+
G4_BB* newJoinBB = joinBB;
2973+
if (lastInst->opcode() == G4_goto)
2974+
{
2975+
G4_BB* targetBB = BB1->Succs.back();
2976+
if (targetBB->getId() <= predBB->getId())
2977+
{
2978+
continue;
2979+
}
2980+
newJoinBB = targetBB;
2981+
}
2982+
2983+
// Need to find a 1st join, the other joins will
2984+
// be examined within the outer loop.
2985+
pushJoin(newJoinBB);
2986+
break;
2987+
}
2988+
}
2989+
2990+
G4_INST* lastInst = predBB->back();
2991+
if (!lastInst->asCFInst()->isUniform())
2992+
{
2993+
pushJoin(joinBB);
2994+
}
2995+
}
2996+
2997+
if ((int)BB->getId() < LastJoinBBId) {
2998+
BB->setDivergent(true);
2999+
// set InSIMDFlow as well, will merge these two fields gradually
3000+
BB->setInSimdFlow(true);
3001+
}
3002+
3003+
if (BB->size() == 0)
3004+
{
3005+
continue;
3006+
}
3007+
3008+
G4_INST* lastInst = BB->back();
3009+
if (((lastInst->opcode() == G4_goto && !lastInst->asCFInst()->isBackward()) ||
3010+
lastInst->opcode() == G4_break) && !lastInst->asCFInst()->isUniform())
3011+
{
3012+
// forward goto/break : the last Succ BB is our target BB
3013+
// For break, it should be the BB right after while inst.
3014+
G4_BB* joinBB = BB->Succs.back();
3015+
pushJoin(joinBB);
3016+
}
3017+
else if (lastInst->opcode() == G4_if && !lastInst->asCFInst()->isUniform())
3018+
{
3019+
G4_Label* labelInst = lastInst->asCFInst()->getUip();
3020+
G4_BB* joinBB = findLabelBB(IT, IE, labelInst->getLabel());
3021+
assert(joinBB && "ICE(vISA) : missing endif label!");
3022+
pushJoin(joinBB);
3023+
}
3024+
else if (lastInst->opcode() == G4_call || lastInst->opcode() == G4_pseudo_fcall)
3025+
{
3026+
// If this function is already in divergent branch, the callee
3027+
// must be in a divergent branch!.
3028+
if (BB->isDivergent() || lastInst->getPredicate() != nullptr)
3029+
{
3030+
FuncInfo* calleeFunc = BB->getCalleeInfo();
3031+
if (funcInfoIndex.count(calleeFunc))
3032+
{
3033+
int ix = funcInfoIndex[calleeFunc];
3034+
allFuncs[ix].InvokedFromDivergentBB = true;
3035+
}
3036+
}
3037+
}
3038+
}
3039+
}
3040+
return;
3041+
}
3042+
27773043
/*
27783044
* Insert a join at the beginning of this basic block, immediately after the label
27793045
* If a join is already present, nothing will be done

visa/FlowGraph.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,12 @@ class G4_BB
289289
// if the block is under simd flow control
290290
bool inSimdFlow;
291291

292+
// If a BB is divergent, this field is set to true. By divergent, it means
293+
// that among all active lanes on entry to shader/kernel, some lanes are not
294+
// (potentially) active in this BB.
295+
// Note : this field will be used to replace inSimdFlow.
296+
bool divergent;
297+
292298
// the physical pred/succ for this block (i.e., the pred/succ for this block in the BB list)
293299
// Note that some transformations may rearrange BB layout, so for safety it's best to recompute
294300
// this
@@ -374,8 +380,8 @@ class G4_BB
374380
traversal(0), idom(NULL), beforeCall(NULL),
375381
afterCall(NULL), calleeInfo(NULL), BBType(G4_BB_NONE_TYPE),
376382
inNaturalLoop(false), hasSendInBB(false), loopNestLevel(0), scopeID(0),
377-
inSimdFlow(false), physicalPred(NULL), physicalSucc(NULL), parent(fg),
378-
instList(alloc)
383+
inSimdFlow(false), divergent(false), physicalPred(NULL), physicalSucc(NULL),
384+
parent(fg), instList(alloc)
379385
{
380386
}
381387

@@ -424,6 +430,8 @@ class G4_BB
424430
void resetNestLevel() { loopNestLevel = 0; }
425431
void setInSimdFlow(bool val) {inSimdFlow = val;}
426432
bool isInSimdFlow() {return inSimdFlow;}
433+
void setDivergent(bool val) { divergent = val; }
434+
bool isDivergent() const { return divergent; }
427435
unsigned getScopeID() { return scopeID; }
428436
void setScopeID(unsigned id) { scopeID = id; }
429437

@@ -1114,6 +1122,7 @@ class FlowGraph
11141122
void addFrameSetupDeclares(IR_Builder& builder, PhyRegPool& regPool);
11151123
void addSaveRestorePseudoDeclares(IR_Builder& builder);
11161124
void markSimdBlocks(std::map<std::string, G4_BB*>& labelMap, FuncInfoHashTable &FuncInfoMap);
1125+
void markDivergentBBs();
11171126

11181127
// Used for CISA 3.0
11191128
void incrementNumBBs() { numBBId++ ; }

visa/Gen4_IR.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,7 +1239,7 @@ class G4_InstCF : public G4_INST
12391239
bool isBackwardBr;
12401240

12411241
// True if this branch is a uniform (all active lanes goes to the same target)
1242-
// Valid for if/while/break/goto only. This could be encoded in instOpt
1242+
// Valid for if/while/break/goto/jmpi only. This could be encoded in instOpt
12431243
bool isUniformBr;
12441244

12451245
public:
@@ -1257,7 +1257,8 @@ class G4_InstCF : public G4_INST
12571257
G4_INST(builder, prd, op, nullptr, false, size, nullptr, nullptr, nullptr, instOpt),
12581258
jip(jipLabel), uip(uipLabel), isBackwardBr(false), isUniformBr(false)
12591259
{
1260-
isUniformBr = (op == G4_goto && (size == 1 || prd == nullptr));
1260+
isUniformBr = (op == G4_jmpi ||
1261+
(op == G4_goto && (size == 1 || prd == nullptr)));
12611262
}
12621263

12631264
// used by jump/call/ret

0 commit comments

Comments
 (0)