@@ -2694,34 +2694,44 @@ void FlowGraph::insertJoinToBB(G4_BB* bb, uint8_t execSize, G4_Label* jip)
26942694 }
26952695}
26962696
2697- typedef std::pair<G4_BB*, int > BlockSizePair;
2697+ struct SJoinInfo {
2698+ SJoinInfo (G4_BB* B, uint16_t E, bool Nested = false ) :
2699+ BB (B), ExecSize(E), IsNestedJoin(Nested) {}
2700+ G4_BB* BB;
2701+ uint16_t ExecSize;
2702+ bool IsNestedJoin; // [HW WA] : join for a goto within a divergent BB.
2703+ };
26982704
2699- static void addBBToActiveJoinList (std::list<BlockSizePair >& activeJoinBlocks, G4_BB* bb, int execSize)
2705+ static void addBBToActiveJoinList (std::list<SJoinInfo >& activeJoinBlocks, G4_BB* bb, int execSize)
27002706{
27012707 // add goto target to list of active blocks that need a join
2702- std::list<BlockSizePair >::iterator listIter;
2708+ std::list<SJoinInfo >::iterator listIter;
27032709 for (listIter = activeJoinBlocks.begin (); listIter != activeJoinBlocks.end (); ++listIter)
27042710 {
2705- G4_BB* aBB = (*listIter).first ;
2711+ // If activeJoinBlocks isn't empty, this join should be considered as a nested join
2712+ SJoinInfo& jinfo = (*listIter);
2713+ G4_BB* aBB = jinfo.BB ;
27062714 if (aBB->getId () == bb->getId ())
27072715 {
27082716 // block already in list, update exec size if necessary
2709- if (execSize > (*listIter). second )
2717+ if (execSize > jinfo. ExecSize )
27102718 {
2711- (*listIter). second = execSize;
2719+ jinfo. ExecSize = execSize;
27122720 }
2721+ jinfo.IsNestedJoin = true ;
27132722 break ;
27142723 }
27152724 else if (aBB->getId () > bb->getId ())
27162725 {
2717- activeJoinBlocks.insert (listIter, BlockSizePair (bb, execSize));
2726+ activeJoinBlocks.insert (listIter, SJoinInfo (bb, execSize, true ));
27182727 break ;
27192728 }
27202729 }
27212730
27222731 if (listIter == activeJoinBlocks.end ())
27232732 {
2724- activeJoinBlocks.push_back (BlockSizePair (bb, execSize));
2733+ bool nested = activeJoinBlocks.empty () ? false : true ;
2734+ activeJoinBlocks.push_back (SJoinInfo (bb, execSize, nested));
27252735 }
27262736}
27272737
@@ -2850,7 +2860,7 @@ void FlowGraph::setJIPForEndif(G4_INST* endif, G4_INST* target, G4_BB* targetBB)
28502860void FlowGraph::processGoto (bool HasSIMDCF)
28512861{
28522862 // list of active blocks where a join needs to be inserted, sorted in lexical order
2853- std::list<BlockSizePair > activeJoinBlocks;
2863+ std::list<SJoinInfo > activeJoinBlocks;
28542864 bool doScalarJmp = !builder->noScalarJmp ();
28552865
28562866 for (BB_LIST_ITER it = BBs.begin (), itEnd = BBs.end (); it != itEnd; ++it)
@@ -2863,18 +2873,18 @@ void FlowGraph::processGoto(bool HasSIMDCF)
28632873
28642874 if (activeJoinBlocks.size () > 0 )
28652875 {
2866- if (bb == activeJoinBlocks.front ().first )
2876+ if (bb == activeJoinBlocks.front ().BB )
28672877 {
28682878 // This block is the target of one or more forward goto,
28692879 // or the fall-thru of a backward goto, needs to insert a join
2870- int execSize = activeJoinBlocks.front ().second ;
2880+ int execSize = activeJoinBlocks.front ().ExecSize ;
28712881 G4_Label* joinJIP = NULL ;
28722882
28732883 activeJoinBlocks.pop_front ();
28742884 if (activeJoinBlocks.size () > 0 )
28752885 {
28762886 // set join JIP to the next active join
2877- G4_BB* joinBlock = activeJoinBlocks.front ().first ;
2887+ G4_BB* joinBlock = activeJoinBlocks.front ().BB ;
28782888 joinJIP = joinBlock->getLabel ();
28792889 }
28802890
@@ -2950,6 +2960,25 @@ void FlowGraph::processGoto(bool HasSIMDCF)
29502960 bb->setInSimdFlow (true );
29512961 }
29522962
2963+ // [HW WA] set nested divergent branch.
2964+ // 1) [conservative] set it if it is divergent, but not necessarily nested, or
2965+ // 2) Set it if there are at least two active joins or one nested join.
2966+ if ((builder->getuint32Option (vISA_noMaskToAnyhWA) & 0x3 ) > 1 )
2967+ {
2968+ if (activeJoinBlocks.size () > 1 ||
2969+ (activeJoinBlocks.size () == 1 && activeJoinBlocks.back ().IsNestedJoin ))
2970+ {
2971+ bb->setInNestedDivergentBranch (true );
2972+ }
2973+ }
2974+ else if ((builder->getuint32Option (vISA_noMaskToAnyhWA) & 0x3 ) > 0 )
2975+ {
2976+ if (activeJoinBlocks.size () > 0 )
2977+ {
2978+ bb->setInNestedDivergentBranch (true );
2979+ }
2980+ }
2981+
29532982 G4_INST* lastInst = bb->back ();
29542983 if (lastInst->opcode () == G4_goto && !lastInst->asCFInst ()->isBackward ())
29552984 {
@@ -2959,7 +2988,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
29592988 bool isUniform = lastInst->getExecSize () == 1 || lastInst->getPredicate () == NULL ;
29602989
29612990 if (isUniform && doScalarJmp &&
2962- (activeJoinBlocks.size () == 0 || activeJoinBlocks.front ().first ->getId () > gotoTargetBB->getId ()))
2991+ (activeJoinBlocks.size () == 0 || activeJoinBlocks.front ().BB ->getId () > gotoTargetBB->getId ()))
29632992 {
29642993 // can convert goto into a scalar jump to UIP, if the jmp will not make us skip any joins
29652994 // CFG itself does not need to be updated
@@ -2970,7 +2999,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
29702999 // set goto JIP to the first active block
29713000 uint8_t eSize = lastInst->getExecSize () > 1 ? lastInst->getExecSize () : pKernel->getSimdSize ();
29723001 addBBToActiveJoinList (activeJoinBlocks, gotoTargetBB, eSize);
2973- G4_BB* joinBlock = activeJoinBlocks.front ().first ;
3002+ G4_BB* joinBlock = activeJoinBlocks.front ().BB ;
29743003 if (lastInst->getExecSize () == 1 )
29753004 { // For simd1 goto, convert it to a goto with the right execSize.
29763005 lastInst->setExecSize (eSize);
0 commit comments