@@ -46,6 +46,79 @@ SPDX-License-Identifier: MIT
4646
4747using namespace vISA ;
4848
49+ //
50+ // Helper class for processGoto to merge join's execution masks.
51+ // For example,
52+ // (p1) goto (8|M8) label
53+ // ....
54+ // (p2) goto (4|M4) label
55+ // ....
56+ // label:
57+ // join (16|M0)
58+ // Merge( (8|M8) and (4|M4)) will be (16|M0)!
59+ //
60+ // Normally, we don't see this kind of code. But visa will generate macro sequence
61+ // like the following, and we have to match join's execMask to all of its gotos. We
62+ // do so by tracking excution mask (execSize + mask offset).
63+ //
64+ // (p) goto (8|M8) L
65+ // ......
66+ // L:
67+ // join (8|M8) // not join (8|M0)
68+ //
69+ class ExecMaskInfo
70+ {
71+ uint8_t ExecSize; // 1|2|4|8|16|32
72+ uint8_t MaskOffset; // 0|4|8|12|16|20|24|28
73+
74+ void mergeEM (ExecMaskInfo& aEM)
75+ {
76+ // The new execMask should cover at least [left, right)
77+ const uint32_t left = std::min (MaskOffset, aEM.getMaskOffset ());
78+ const uint32_t right = std::max (MaskOffset + ExecSize, aEM.getMaskOffset () + aEM.getExecSize ());
79+ // Divide 32 channels into 8 quarters
80+ uint32_t lowQuarter = left / 4 ;
81+ uint32_t highQuarter = (right - 1 ) / 4 ;
82+ if (lowQuarter < 4 && highQuarter >= 4 )
83+ {
84+ // (32, M0)
85+ ExecSize = 32 ;
86+ MaskOffset = 0 ;
87+ }
88+ else if (lowQuarter < 2 && highQuarter >= 2 )
89+ {
90+ // (16, M0|M16)
91+ ExecSize = 16 ;
92+ MaskOffset = 0 ;
93+ }
94+ else if (lowQuarter < 6 && highQuarter >= 6 )
95+ {
96+ // (16, M16)
97+ ExecSize = 16 ;
98+ MaskOffset = 16 ;
99+ }
100+ // at this time, the range resides in one of [Q0,Q1], [Q2,Q3], [Q4,Q5], and [Q6,Q7].
101+ else
102+ {
103+ // (4|8, ...)
104+ ExecSize = (lowQuarter != highQuarter ? 8 : 4 );
105+ MaskOffset = left;
106+ }
107+ }
108+
109+ public:
110+ ExecMaskInfo () : ExecSize(0 ), MaskOffset(0 ) {};
111+ ExecMaskInfo (uint8_t aE, uint8_t aM) : ExecSize(aE), MaskOffset(aM) {}
112+
113+ uint8_t getExecSize () const { return ExecSize; }
114+ uint8_t getMaskOffset () const { return MaskOffset; }
115+
116+ void mergeExecMask (G4_ExecSize aExSize, uint8_t aMaskOffset)
117+ {
118+ ExecMaskInfo anotherEM{ aExSize, aMaskOffset };
119+ mergeEM (anotherEM);
120+ }
121+ };
49122
50123void GlobalOpndHashTable::HashNode::insert (uint16_t newLB, uint16_t newRB)
51124{
@@ -3039,9 +3112,10 @@ G4_BB* FlowGraph::getUniqueReturnBlock()
30393112
30403113/*
30413114* Insert a join at the beginning of this basic block, immediately after the label
3042- * If a join is already present, nothing will be done
3115+ * If a join is already present, make sure the join will cover the given 'execSize' and
3116+ * 'maskOffset'.
30433117*/
3044- void FlowGraph::insertJoinToBB (G4_BB* bb, G4_ExecSize execSize, G4_Label* jip)
3118+ void FlowGraph::insertJoinToBB (G4_BB* bb, G4_ExecSize execSize, G4_Label* jip, uint8_t maskOffset )
30453119{
30463120 MUST_BE_TRUE (bb->size () > 0 , " empty block" );
30473121 INST_LIST_ITER iter = bb->begin ();
@@ -3055,7 +3129,8 @@ void FlowGraph::insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip)
30553129 if (iter == bb->end ())
30563130 {
30573131 // insert join at the end
3058- G4_INST* jInst = builder->createInternalCFInst (NULL , G4_join, execSize, jip, NULL , InstOpt_NoOpt);
3132+ G4_InstOption instMask = G4_INST::offsetToMask (execSize, maskOffset, builder->hasNibCtrl ());
3133+ G4_INST* jInst = builder->createInternalCFInst (NULL , G4_join, execSize, jip, NULL , instMask);
30593134 bb->push_back (jInst, false );
30603135 }
30613136 else
@@ -3064,22 +3139,34 @@ void FlowGraph::insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip)
30643139
30653140 if (secondInst->opcode () == G4_join)
30663141 {
3067- if (execSize > secondInst->getExecSize ())
3142+ G4_ExecSize origExSize = secondInst->getExecSize ();
3143+ uint8_t origMaskOffset = (uint8_t )secondInst->getMaskOffset ();
3144+ ExecMaskInfo joinEM{ origExSize, origMaskOffset };
3145+ joinEM.mergeExecMask (execSize, maskOffset);
3146+ if (joinEM.getExecSize () > origExSize)
3147+ {
3148+ secondInst->setExecSize (G4_ExecSize{ joinEM.getExecSize () });
3149+ }
3150+ if (joinEM.getMaskOffset () != origMaskOffset)
30683151 {
3069- secondInst->setExecSize (execSize);
3152+ G4_InstOption nMask =
3153+ G4_INST::offsetToMask (joinEM.getExecSize (), joinEM.getMaskOffset (), builder->hasNibCtrl ());
3154+ secondInst->setMaskOption (nMask);
30703155 }
30713156 }
30723157 else
30733158 {
3074- G4_INST* jInst = builder->createInternalCFInst (NULL , G4_join, execSize, jip, NULL , InstOpt_NoOpt);
3159+ G4_InstOption instMask = G4_INST::offsetToMask (execSize, maskOffset, builder->hasNibCtrl ());
3160+ G4_INST* jInst = builder->createInternalCFInst (NULL , G4_join, execSize, jip, NULL , instMask);
30753161 bb->insertBefore (iter, jInst, false );
30763162 }
30773163 }
30783164}
30793165
3080- typedef std::pair<G4_BB*, G4_ExecSize> BlockSizePair;
3166+ // For tracking execMask information of join.
3167+ typedef std::pair<G4_BB*, ExecMaskInfo> BlockSizePair;
30813168
3082- static void addBBToActiveJoinList (std::list<BlockSizePair>& activeJoinBlocks, G4_BB* bb, G4_ExecSize execSize)
3169+ static void addBBToActiveJoinList (std::list<BlockSizePair>& activeJoinBlocks, G4_BB* bb, G4_ExecSize execSize, uint8_t maskOff )
30833170{
30843171 // add goto target to list of active blocks that need a join
30853172 std::list<BlockSizePair>::iterator listIter;
@@ -3089,22 +3176,20 @@ static void addBBToActiveJoinList(std::list<BlockSizePair>& activeJoinBlocks, G4
30893176 if (aBB->getId () == bb->getId ())
30903177 {
30913178 // block already in list, update exec size if necessary
3092- if (execSize > (*listIter).second )
3093- {
3094- (*listIter).second = execSize;
3095- }
3179+ ExecMaskInfo& EM = (*listIter).second ;
3180+ EM.mergeExecMask (execSize, maskOff);
30963181 break ;
30973182 }
30983183 else if (aBB->getId () > bb->getId ())
30993184 {
3100- activeJoinBlocks.insert (listIter, BlockSizePair (bb, execSize));
3185+ ( void ) activeJoinBlocks.insert (listIter, BlockSizePair (bb, ExecMaskInfo ( execSize, maskOff) ));
31013186 break ;
31023187 }
31033188 }
31043189
31053190 if (listIter == activeJoinBlocks.end ())
31063191 {
3107- activeJoinBlocks.push_back (BlockSizePair (bb, execSize));
3192+ activeJoinBlocks.push_back (BlockSizePair (bb, ExecMaskInfo ( execSize, maskOff) ));
31083193 }
31093194}
31103195
@@ -3373,7 +3458,9 @@ void FlowGraph::processGoto(bool HasSIMDCF)
33733458 {
33743459 // This block is the target of one or more forward goto,
33753460 // or the fall-thru of a backward goto, needs to insert a join
3376- G4_ExecSize execSize = activeJoinBlocks.front ().second ;
3461+ ExecMaskInfo& EM = activeJoinBlocks.front ().second ;
3462+ uint8_t eSize = EM.getExecSize ();
3463+ uint8_t mOff = EM.getMaskOffset ();
33773464 G4_Label* joinJIP = NULL ;
33783465
33793466 activeJoinBlocks.pop_front ();
@@ -3384,7 +3471,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
33843471 joinJIP = joinBlock->getLabel ();
33853472 }
33863473
3387- insertJoinToBB (bb, execSize , joinJIP);
3474+ insertJoinToBB (bb, G4_ExecSize{eSize} , joinJIP, mOff );
33883475 }
33893476 }
33903477
@@ -3425,7 +3512,8 @@ void FlowGraph::processGoto(bool HasSIMDCF)
34253512 // join) within the loop body will has its JIP set to this join.
34263513 if (G4_BB* afterLoopJoinBB = getEarliestJmpOutBB (activeJoinBlocks, bb, predBB))
34273514 {
3428- addBBToActiveJoinList (activeJoinBlocks, afterLoopJoinBB, eSize);
3515+ // conservatively use maskoffset = 0.
3516+ addBBToActiveJoinList (activeJoinBlocks, afterLoopJoinBB, eSize, 0 );
34293517 }
34303518 }
34313519 else
@@ -3439,7 +3527,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
34393527 // add join to the fall-thru BB
34403528 if (G4_BB* fallThruBB = predBB->getPhysicalSucc ())
34413529 {
3442- addBBToActiveJoinList (activeJoinBlocks, fallThruBB, eSize);
3530+ addBBToActiveJoinList (activeJoinBlocks, fallThruBB, eSize, ( uint8_t )lastInst-> getMaskOffset () );
34433531 lastInst->asCFInst ()->setJip (fallThruBB->getLabel ());
34443532 }
34453533 }
@@ -3466,7 +3554,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
34663554 // set goto JIP to the first active block
34673555 G4_ExecSize eSize = lastInst->getExecSize () > g4::SIMD1 ?
34683556 lastInst->getExecSize () : pKernel->getSimdSize ();
3469- addBBToActiveJoinList (activeJoinBlocks, gotoTargetBB, eSize);
3557+ addBBToActiveJoinList (activeJoinBlocks, gotoTargetBB, eSize, ( uint8_t )lastInst-> getMaskOffset () );
34703558 G4_BB* joinBlock = activeJoinBlocks.front ().first ;
34713559 if (lastInst->getExecSize () == g4::SIMD1)
34723560 { // For simd1 goto, convert it to a goto with the right execSize.
0 commit comments