@@ -298,6 +298,47 @@ G4_SrcRegRegion* HWConformity::insertCopyBefore(INST_LIST_ITER it, uint32_t srcN
298298 return newSrc;
299299}
300300
301+ G4_SrcRegRegion* HWConformity::insertCopyAtBBEntry (G4_BB *bb, uint8_t execSize, G4_Operand *src)
302+ {
303+ MUST_BE_TRUE (src != nullptr && src->isSrcRegRegion (), " source must be a SrcRegRegion" );
304+ G4_SrcRegRegion* origSrc = src->asSrcRegRegion ();
305+ auto lb = src->getLinearizedStart ();
306+ auto rb = src->getLinearizedEnd ();
307+
308+ unsigned int regNum = lb / G4_GRF_REG_NBYTES;
309+ unsigned int numRegs = (rb + G4_GRF_REG_NBYTES - 1 - lb) / G4_GRF_REG_NBYTES;
310+ if (regNum == -1 || numRegs == 0 )
311+ {
312+ return nullptr ;
313+ }
314+
315+ G4_Declare* dcl = builder.createTempVar (execSize, origSrc->getType (), GRFALIGN);
316+ dcl->getRegVar ()->setPhyReg (builder.phyregpool .getGreg (regNum), 0 );
317+ G4_SrcModifier modifier = origSrc->getModifier ();
318+ origSrc->setModifier (Mod_src_undef);
319+ G4_DstRegRegion* dst = builder.Create_Dst_Opnd_From_Dcl (dcl, 1 );
320+ dst->computePReg ();
321+
322+ G4_INST* movInst = builder.createMov (execSize, dst, origSrc, InstOpt_WriteEnable, false );
323+
324+ for (auto it = bb->begin ();
325+ it != bb->end ();
326+ it++)
327+ {
328+ if (!(*it)->isLabel ())
329+ {
330+ bb->insert (it, movInst);
331+ break ;
332+ }
333+ }
334+
335+ G4_SrcRegRegion* newSrc = builder.createSrcRegRegion (modifier, Direct, dcl->getRegVar (),
336+ 0 , 0 , execSize == 1 ? builder.getRegionScalar () : builder.getRegionStride1 (),
337+ dcl->getElemType ());
338+ newSrc->asSrcRegRegion ()->computePReg ();
339+ return newSrc;
340+ }
341+
301342/*
302343 * create a new mov instruction
303344 * mov (esize) tmp<1>:type src
@@ -4222,6 +4263,7 @@ struct AccInterval
42224263 int assignedAcc = -1 ;
42234264 int bundleConflictTimes = 0 ;
42244265 int bankConflictTimes = 0 ;
4266+ int suppressionTimes = 0 ;
42254267
42264268 AccInterval (G4_INST* inst_, int lastUse_, bool preAssigned = false ) :
42274269 inst (inst_), lastUse(lastUse_), isPreAssigned(preAssigned)
@@ -4543,6 +4585,31 @@ static bool replaceDstWithAcc(G4_INST* inst, int accNum, IR_Builder& builder)
45434585 return true ;
45444586}
45454587
4588+ static bool isAccCandidate (G4_INST* inst, Gen4_Operand_Number opndNum, G4_Kernel& kernel)
4589+
4590+ {
4591+ if (!kernel.fg .builder ->canMadHaveSrc0Acc ())
4592+ {
4593+ return false ;
4594+ }
4595+
4596+ switch (opndNum)
4597+ {
4598+ case Opnd_src0:
4599+ case Opnd_src1:
4600+ break ;
4601+ default :
4602+ return false ;
4603+ }
4604+
4605+ if (!inst->canSrcBeAcc (opndNum))
4606+ {
4607+ return false ;
4608+ }
4609+
4610+ return true ;
4611+ }
4612+
45464613struct AccAssignment
45474614{
45484615 std::vector<bool > freeAccs;
@@ -4677,13 +4744,15 @@ void HWConformity::multiAccSubstitution(G4_BB* bb)
46774744 bool mustBeAcc0 = false ;
46784745 int bundleBCTimes = 0 ;
46794746 int bankBCTimes = 0 ;
4747+ int readSuppressionSrcs = 0 ;
46804748 if (isAccCandidate (inst, kernel, lastUseId, mustBeAcc0))
46814749 {
46824750 // this is a potential candidate for acc substitution
46834751 AccInterval *newInterval = new AccInterval (inst, lastUseId);
46844752 newInterval->mustBeAcc0 = mustBeAcc0;
46854753 newInterval->bankConflictTimes = bankBCTimes;
46864754 newInterval->bundleConflictTimes = bundleBCTimes;
4755+ newInterval->suppressionTimes = readSuppressionSrcs;
46874756
46884757 intervals.push_back (newInterval);
46894758 }
@@ -4788,7 +4857,126 @@ void HWConformity::multiAccSubstitution(G4_BB* bb)
47884857 {
47894858 delete intervals[i];
47904859 }
4860+
4861+ return ;
4862+ }
4863+
4864+ struct LiveNode
4865+ {
4866+ G4_INST* Inst;
4867+ Gen4_Operand_Number OpNum;
4868+ LiveNode (G4_INST* Inst, Gen4_Operand_Number OpNum)
4869+ : Inst(Inst)
4870+ , OpNum(OpNum)
4871+ {
4872+ }
4873+ };
4874+
4875+ #define GLOBAL_USE_NUM 15
4876+
4877+ static bool isSameOperand (G4_Operand *srcOpnd, struct LiveNode *ln)
4878+ {
4879+ G4_Operand *opnd = ln->Inst ->getOperand (ln->OpNum );
4880+
4881+ if (opnd->compareOperand (srcOpnd) == Rel_eq)
4882+ {
4883+ return true ;
4884+ }
4885+
4886+ return false ;
47914887}
4888+
4889+ // substitute local operands with acc when possible
4890+ void HWConformity::localizeForAcc (G4_BB* bb)
4891+ {
4892+ std::map<const G4_Declare*, G4_Operand*> replacedOperand;
4893+ std::unordered_map<const G4_Declare*, vector<struct LiveNode >> useNodes;
4894+ std::vector<const G4_Declare*> erasedCandidates;
4895+
4896+ for (auto instIter = bb->begin (), instEnd = bb->end (); instIter != instEnd; ++instIter)
4897+ {
4898+ G4_INST* inst = *instIter;
4899+
4900+ // Not defined in current BB
4901+ G4_Operand* dst = inst->getOperand (Opnd_dst);
4902+ if (dst && dst->isGreg () && kernel.fg .globalOpndHT .isOpndGlobal (dst))
4903+ {
4904+ const G4_Declare *dcl = dst->getTopDcl ();
4905+ if (useNodes.find (dcl) != useNodes.end ())
4906+ {
4907+ useNodes.erase (dcl);
4908+ erasedCandidates.emplace_back (dcl);
4909+ }
4910+ }
4911+
4912+ // Source operand
4913+ for (auto OpNum :
4914+ { Gen4_Operand_Number::Opnd_src0, Gen4_Operand_Number::Opnd_src1,
4915+ Gen4_Operand_Number::Opnd_src2})
4916+ {
4917+ G4_Operand* src = inst->getOperand (OpNum);
4918+ if (src && src->isGreg () && kernel.fg .globalOpndHT .isOpndGlobal (src))
4919+ {
4920+ const G4_Declare* dcl = src->getTopDcl ();
4921+ if ((OpNum != Opnd_src0 && // Acc can be used only for src0 and src1
4922+ OpNum != Opnd_src1) ||
4923+ !isAccCandidate (inst, OpNum, kernel)) // The operand is can be replaced with ACC
4924+ {
4925+ auto dclIter = std::find (erasedCandidates.begin (), erasedCandidates.end (), dcl);
4926+ if (dclIter == erasedCandidates.end ())
4927+ {
4928+ erasedCandidates.emplace_back (dcl);
4929+ }
4930+ }
4931+ else
4932+ {
4933+ if (useNodes[dcl].empty () ||
4934+ isSameOperand (src, &(useNodes[dcl][0 ])))
4935+ {
4936+ useNodes[dcl].emplace_back (inst, OpNum);
4937+ }
4938+ }
4939+ }
4940+ }
4941+ }
4942+
4943+ for (auto & Nodes : useNodes)
4944+ {
4945+ const G4_Declare* dcl = Nodes.first ;
4946+ auto dclIter = std::find (erasedCandidates.begin (), erasedCandidates.end (), dcl);
4947+ if (dclIter != erasedCandidates.end ())
4948+ {
4949+ continue ;
4950+ }
4951+ if (Nodes.second .size () >= GLOBAL_USE_NUM)
4952+ {
4953+ for (auto & LN : Nodes.second )
4954+ {
4955+ G4_INST* inst = LN.Inst ;
4956+ Gen4_Operand_Number opNum = LN.OpNum ;
4957+ int i = inst->getSrcNum (opNum);
4958+ G4_Operand* src = inst->getSrc (i);
4959+ G4_Operand* tmpOpnd = nullptr ;
4960+
4961+ auto itR = replacedOperand.find (dcl);
4962+ if (itR != replacedOperand.end ())
4963+ {
4964+ tmpOpnd = builder.duplicateOperand (itR->second );
4965+ }
4966+ else
4967+ {
4968+ tmpOpnd = insertCopyAtBBEntry (bb, inst->getExecSize (), src);
4969+ replacedOperand[dcl] = tmpOpnd;
4970+ }
4971+ inst->setSrc (tmpOpnd, i);
4972+ }
4973+ }
4974+ }
4975+
4976+ return ;
4977+ }
4978+
4979+
47924980// substitute local operands with acc when possible
47934981void HWConformity::accSubstitution (G4_BB* bb)
47944982{
0 commit comments