@@ -933,6 +933,8 @@ void FlowGraph::constructFlowGraph(INST_LIST& instlist)
933933 kernelInfo->updateExitBB (BBs.back ());
934934 }
935935
936+ markDivergentBBs ();
937+
936938 builder->materializeGlobalImm (getEntryBB ());
937939 normalizeRegionDescriptors ();
938940 localDataFlowAnalysis ();
@@ -2774,6 +2776,270 @@ void FlowGraph::markSimdBlocks(std::map<std::string, G4_BB*>& labelMap, FuncInfo
27742776 }
27752777}
27762778
2779+ // markDivergentBBs()
2780+ // If BB is on the divergent path, mark it as divergent.
2781+ // Divergent:
2782+ // If all active simd lanes on entry to shader/kernel are
2783+ // active in a BB, that BB is NOT divergent; otherwise,
2784+ // it is divergent.
2785+ // Note: this will be used to replace inSIMDCF gradually.
2786+ void FlowGraph::markDivergentBBs ()
2787+ {
2788+ // Assumption:
2789+ // 1. For each function, it has a single return (for function)
2790+ // or exit (for entry function). And that return/exit is the
2791+ // last BB of that function.
2792+ // 2. The entry function will appear first in the BB list, and
2793+ // if there is a call from A to B, A shall appear prior to B
2794+ // in BB list.
2795+ // 3. There is no indirect call, and no recursive call.
2796+ //
2797+ // Required: need to set BB id.
2798+ //
2799+ // Key variables:
2800+ // LastJoinBB:
2801+ // LastJoinBB is the fartherest joinBB of any goto/break/if/while
2802+ // so far, as described below in the algorithm.
2803+ // LastJoinBBId: Id(LastJoinBB).
2804+ //
2805+ // The algorithm initializes LastJoinBBId to -1 and scans all BBs in order.
2806+ // It checks control-flow instructions to see if it diverges (has join).
2807+ // If so, the algorithm sets LastJoinBBId to be the larger one of its join BB
2808+ // and LastJoinBBId; For a non-negative LastJoinBBId, it means that there is
2809+ // an active join in that BB, and therefore, all BBs from the current BB to
2810+ // that LastJoinBB (LastJoinBB not included) will be in divergent path.
2811+ //
2812+ // The algorithm checks the following cases and their join BBs are:
2813+ // case 1: cf inst = goto
2814+ // <currBB> [(p)] goto L
2815+ // ...
2816+ // <joinBB> L:
2817+ // case 2: cf inst = if
2818+ // <currBB> if
2819+ // ...
2820+ // <joinBB> endif
2821+ //
2822+ // case 3: cf inst = break
2823+ // <currBB> break
2824+ // ...
2825+ // [(p)] while
2826+ // <joinBB>
2827+ // case 4:
2828+ // <currBB> L:
2829+ // ....
2830+ // [(p)] while/goto L
2831+ // <joinBB>
2832+ //
2833+ int LastJoinBBId;
2834+
2835+ auto pushJoin = [&](G4_BB* joinBB) {
2836+ LastJoinBBId = std::max (LastJoinBBId, (int )joinBB->getId ());
2837+ };
2838+ auto popJoinIfMatch = [&](G4_BB* BB) {
2839+ if ((int )BB->getId () == LastJoinBBId) {
2840+ LastJoinBBId = -1 ;
2841+ }
2842+ };
2843+
2844+ if (BBs.empty ())
2845+ {
2846+ // Sanity check
2847+ return ;
2848+ }
2849+
2850+ reassignBlockIDs ();
2851+
2852+ // Analyze function in topological order. As there is no recursion
2853+ // and no indirect call, a function will be analyzed only if all
2854+ // its callers have been analyzed.
2855+ //
2856+ // If no subroutine, sortedFuncTable is empty. Here keep all functions
2857+ // in a vector first (it works with and without subroutines), then scan
2858+ // functions in topological order.
2859+ struct StartEndIter {
2860+ BB_LIST_ITER StartI;
2861+ BB_LIST_ITER EndI;
2862+ bool InvokedFromDivergentBB;
2863+ };
2864+ int numFuncs = (int )sortedFuncTable.size ();
2865+ std::vector<StartEndIter> allFuncs;
2866+ std::unordered_map<FuncInfo*, uint32_t > funcInfoIndex;
2867+ if (numFuncs == 0 )
2868+ {
2869+ numFuncs = 1 ;
2870+ allFuncs.resize (1 );
2871+ allFuncs[0 ].StartI = BBs.begin ();
2872+ allFuncs[0 ].EndI = BBs.end ();
2873+ allFuncs[0 ].InvokedFromDivergentBB = false ;
2874+ }
2875+ else
2876+ {
2877+ allFuncs.resize (numFuncs);
2878+ for (int i = numFuncs; i > 0 ; --i)
2879+ {
2880+ uint32_t ix = (uint32_t )(i - 1 );
2881+ FuncInfo* pFInfo = sortedFuncTable[ix];
2882+ G4_BB* StartBB = pFInfo->getInitBB ();
2883+ G4_BB* EndBB = pFInfo->getExitBB ();
2884+ uint32_t ui = (uint32_t )(numFuncs - i);
2885+ allFuncs[ui].StartI = std::find (BBs.begin (), BBs.end (), StartBB);
2886+ auto nextI = std::find (BBs.begin (), BBs.end (), EndBB);
2887+ assert (nextI != BBs.end () && " ICE: subroutine's end BB not found!" );
2888+ allFuncs[ui].EndI = (++nextI);
2889+ allFuncs[ui].InvokedFromDivergentBB = false ;
2890+
2891+ funcInfoIndex[pFInfo] = ui;
2892+ }
2893+ }
2894+
2895+ for (int i = 0 ; i < numFuncs; ++i)
2896+ {
2897+ // each function: [IT, IE)
2898+ BB_LIST_ITER& IT = allFuncs[i].StartI ;
2899+ BB_LIST_ITER& IE = allFuncs[i].EndI ;
2900+ if (IT == IE)
2901+ {
2902+ // sanity check
2903+ continue ;
2904+ }
2905+
2906+ if (allFuncs[i].InvokedFromDivergentBB )
2907+ {
2908+ // subroutine's divergent on entry. Mark every BB as divergent
2909+ for (; IT != IE; ++IT)
2910+ {
2911+ G4_BB* BB = *IT;
2912+
2913+ BB->setDivergent (true );
2914+ // set InSIMDFlow as well, will merge two gradually
2915+ BB->setInSimdFlow (true );
2916+
2917+ if (BB->size () == 0 )
2918+ {
2919+ // sanity check
2920+ continue ;
2921+ }
2922+ if (BB->isEndWithCall () || BB->isEndWithFCall ())
2923+ {
2924+ FuncInfo* calleeFunc = BB->getCalleeInfo ();
2925+ if (funcInfoIndex.count (calleeFunc))
2926+ {
2927+ int ix = funcInfoIndex[calleeFunc];
2928+ allFuncs[ix].InvokedFromDivergentBB = true ;
2929+ }
2930+ }
2931+ }
2932+ // continue for next func
2933+ continue ;
2934+ }
2935+
2936+ LastJoinBBId = -1 ;
2937+ for (; IT != IE; ++IT)
2938+ {
2939+ G4_BB* BB = *IT;
2940+
2941+ // This handles cases in which BB has endif/while/join as well as others
2942+ // so we don't need to explicitly check whether BB has endif/while/join, etc.
2943+ popJoinIfMatch (BB);
2944+
2945+ // Handle loop
2946+ for (auto iter = BB->Preds .begin (), iterEnd = BB->Preds .end (); iter != iterEnd; ++iter)
2947+ {
2948+ G4_BB* predBB = *iter;
2949+ if (predBB->getId () < BB->getId ())
2950+ continue ;
2951+
2952+ BB_LIST_ITER LoopIterEnd = std::find (BBs.begin (), BBs.end (), predBB);
2953+
2954+ // joinBB is the BB right after backward-goto/while
2955+ BB_LIST_ITER loopJoinIter = LoopIterEnd;
2956+ ++loopJoinIter;
2957+ assert (loopJoinIter != BBs.end () && " ICE: missing join BB!" );
2958+ G4_BB* joinBB = *loopJoinIter;
2959+
2960+ // Scan loop to find any out-of-loop branch, set join if any
2961+ for (auto LoopIter = IT; LoopIter != LoopIterEnd; ++LoopIter)
2962+ {
2963+ G4_BB* BB1 = *LoopIter;
2964+ if (BB1->size () == 0 )
2965+ {
2966+ continue ;
2967+ }
2968+ G4_INST* lastInst = BB1->back ();
2969+ if ((lastInst->opcode () == G4_break || lastInst->opcode () == G4_goto) &&
2970+ !lastInst->asCFInst ()->isUniform ())
2971+ {
2972+ G4_BB* newJoinBB = joinBB;
2973+ if (lastInst->opcode () == G4_goto)
2974+ {
2975+ G4_BB* targetBB = BB1->Succs .back ();
2976+ if (targetBB->getId () <= predBB->getId ())
2977+ {
2978+ continue ;
2979+ }
2980+ newJoinBB = targetBB;
2981+ }
2982+
2983+ // Need to find a 1st join, the other joins will
2984+ // be examined within the outer loop.
2985+ pushJoin (newJoinBB);
2986+ break ;
2987+ }
2988+ }
2989+
2990+ G4_INST* lastInst = predBB->back ();
2991+ if (!lastInst->asCFInst ()->isUniform ())
2992+ {
2993+ pushJoin (joinBB);
2994+ }
2995+ }
2996+
2997+ if ((int )BB->getId () < LastJoinBBId) {
2998+ BB->setDivergent (true );
2999+ // set InSIMDFlow as well, will merge these two fields gradually
3000+ BB->setInSimdFlow (true );
3001+ }
3002+
3003+ if (BB->size () == 0 )
3004+ {
3005+ continue ;
3006+ }
3007+
3008+ G4_INST* lastInst = BB->back ();
3009+ if (((lastInst->opcode () == G4_goto && !lastInst->asCFInst ()->isBackward ()) ||
3010+ lastInst->opcode () == G4_break) && !lastInst->asCFInst ()->isUniform ())
3011+ {
3012+ // forward goto/break : the last Succ BB is our target BB
3013+ // For break, it should be the BB right after while inst.
3014+ G4_BB* joinBB = BB->Succs .back ();
3015+ pushJoin (joinBB);
3016+ }
3017+ else if (lastInst->opcode () == G4_if && !lastInst->asCFInst ()->isUniform ())
3018+ {
3019+ G4_Label* labelInst = lastInst->asCFInst ()->getUip ();
3020+ G4_BB* joinBB = findLabelBB (IT, IE, labelInst->getLabel ());
3021+ assert (joinBB && " ICE(vISA) : missing endif label!" );
3022+ pushJoin (joinBB);
3023+ }
3024+ else if (lastInst->opcode () == G4_call || lastInst->opcode () == G4_pseudo_fcall)
3025+ {
3026+ // If this function is already in divergent branch, the callee
3027+ // must be in a divergent branch!.
3028+ if (BB->isDivergent () || lastInst->getPredicate () != nullptr )
3029+ {
3030+ FuncInfo* calleeFunc = BB->getCalleeInfo ();
3031+ if (funcInfoIndex.count (calleeFunc))
3032+ {
3033+ int ix = funcInfoIndex[calleeFunc];
3034+ allFuncs[ix].InvokedFromDivergentBB = true ;
3035+ }
3036+ }
3037+ }
3038+ }
3039+ }
3040+ return ;
3041+ }
3042+
27773043/*
27783044* Insert a join at the beginning of this basic block, immediately after the label
27793045* If a join is already present, nothing will be done
0 commit comments