@@ -19,7 +19,7 @@ static bool regSortCompare(regMap map1, regMap map2) {
1919 return false ;
2020}
2121
22- static bool regSortCompareBeforeRA (regMapBRA map1, regMapBRA map2) {
22+ static bool regSortCompareAfterRA (regMapBRA map1, regMapBRA map2) {
2323 if (map1.opndNum < map2.opndNum ) {
2424 return true ;
2525 } else if (map1.opndNum > map2.opndNum ) {
@@ -430,7 +430,7 @@ void SRSubPass::SRSub(G4_BB *bb) {
430430
431431// Check if current instruction is the candidate of sendi.
432432// Recorded as candidate.
433- bool SRSubPassBeforeRA::isSRCandidateBeforeRA (G4_INST *inst,
433+ bool SRSubPassAfterRA::isSRCandidateAfterRA (G4_INST *inst,
434434 regCandidatesBRA &dstSrcRegs) {
435435 if (!inst->isSend ()) {
436436 return false ;
@@ -482,6 +482,7 @@ bool SRSubPassBeforeRA::isSRCandidateBeforeRA(G4_INST *inst,
482482 int movInstNum = 0 ;
483483 int32_t firstDefID = 0x7FFFFFFF ; // the ID of the first instruction define the
484484 std::vector<std::pair<Gen4_Operand_Number, unsigned >> notRemoveableMap;
485+ std::vector<G4_INST *> immMovs;
485486 for (auto I = inst->def_begin (), E = inst->def_end (); I != E; ++I) {
486487 auto &&def = *I;
487488
@@ -572,14 +573,64 @@ bool SRSubPassBeforeRA::isSRCandidateBeforeRA(G4_INST *inst,
572573
573574 // It's not global define
574575 if (!(builder.getIsKernel () && kernel.fg .getNumBB () == 1 )) {
575- if (kernel.fg .globalOpndHT .isOpndGlobal (dstRgn)) {
576+ if (kernel.fg .globalOpndHT .isOpndGlobal (dstRgn) && !dstRgn-> getTopDcl ()-> getIsBBLocal () ) {
576577 return false ;
577578 }
578579 }
579580
580581 return true ;
581582 };
582583
584+ // mov (16) r81.0<1>:f 0x8:f // $52:&54:
585+ // mov (16|M16) r89.0<1>:f 0x8:f // $53:&55:
586+ // mov (16) r82.0<1>:f 0x0:f // $54:&56:
587+ // mov (16|M16) r90.0<1>:f 0x0:f // $55:&57:
588+ // mov (16) r83.0<1>:f 0x0:f // $56:&58:
589+ // mov (16|M16) r91.0<1>:f 0x0:f // $57:&59:
590+ // mov (16) r84.0<1>:f 0x0:f // $58:&60:
591+ // mov (16|M16) r92.0<1>:f 0x0:f // $59:&61:
592+ // mov (16) r85.0<1>:f 0x0:f // $60:&62:
593+ // mov (16|M16) r93.0<1>:f 0x0:f // $61:&63:
594+ // mov (16) r86.0<1>:f 0x0:f // $62:&64:
595+ // mov (16|M16) r94.0<1>:f 0x0:f // $63:&65:
596+ // mov (16) r87.0<1>:f 0x0:f // $64:&66:
597+ // mov (16|M16) r95.0<1>:f 0x0:f // $65:&67:
598+ // mov (16) r88.0<1>:f 0x0:f // $66:&68:
599+ // mov (16|M16) r96.0<1>:f 0x0:f // $67:&69:
600+ // ==>
601+ // mov (16) r81.0<1>:f 0x8:f // $52:&54:
602+ // mov (16|M16) r89.0<1>:f 0x8:f // $53:&55:
603+ // mov (16) r82.0<1>:f 0x0:f // $54:&56:
604+ // mov (16|M16) r90.0<1>:f 0x0:f // $55:&57:
605+ //
606+ // Reuse r81, r89, r82, r90 in the gather send
607+ auto getRemoveableImm = [this ](G4_INST *inst,
608+ std::vector<G4_INST *> &immMovs) {
609+ G4_Operand *src = inst->getSrc (0 );
610+ int64_t imm = src->asImm ()->getImm ();
611+ for (size_t i = 0 ; i < immMovs.size (); i++) {
612+ G4_INST *imov = immMovs[i];
613+ G4_Operand *isrc = imov->getSrc (0 );
614+ int64_t iimm = isrc->asImm ()->getImm ();
615+ if (imm == iimm &&
616+ src->getType () == isrc->getType () && // Same value and same type
617+ inst->getDst ()->getType () ==
618+ imov->getDst ()->getType () && // Same dst type
619+ inst->getDst ()->asDstRegRegion ()->getHorzStride () ==
620+ imov->getDst ()
621+ ->asDstRegRegion ()
622+ ->getHorzStride () && // Same region
623+ inst->getExecSize () == imov->getExecSize () && // Same execution size
624+ inst->getMaskOffset () ==
625+ imov->getMaskOffset ()) { // Same mask offset
626+ return imov;
627+ }
628+ }
629+ immMovs.push_back (inst);
630+
631+ return (G4_INST *)nullptr ;
632+ };
633+
583634 // if opndNum + offset is defined multiple times, cannobe be removed
584635 G4_Operand *dst = movInst->getDst ();
585636 unsigned offset = dst->getLeftBound () / builder.getGRFSize ();
@@ -604,6 +655,22 @@ bool SRSubPassBeforeRA::isSRCandidateBeforeRA(G4_INST *inst,
604655 movInstNum++;
605656 }
606657 } else {
658+ if (movInst->getSrc (0 ) && movInst->getSrc (0 )->isImm ()) {
659+ // Check if there is mov instruction with same imm value
660+ G4_INST *lvnMov = getRemoveableImm (movInst, immMovs);
661+
662+ if (lvnMov) {
663+ // The offset is the offset of original dst, which is used to identify
664+ // the original register used in send.
665+ // The opndNum is the opndNum of send.
666+ regMapBRA regPair (movInst, opndNum, offset,
667+ lvnMov->getDst ()); // the lvn mov dst can be reused
668+ dstSrcRegs.dstSrcMap .push_back (regPair);
669+ firstDefID = std::min (firstDefID, def.first ->getLocalId ());
670+ movInstNum++;
671+ continue ;
672+ }
673+ }
607674 notRemoveableMap.push_back (std::make_pair (opndNum, offset));
608675 }
609676 }
@@ -639,14 +706,14 @@ bool SRSubPassBeforeRA::isSRCandidateBeforeRA(G4_INST *inst,
639706 dstSrcRegs.firstDefID = firstDefID;
640707 // Sort according to the register order in the original payload
641708 std::sort (dstSrcRegs.dstSrcMap .begin (), dstSrcRegs.dstSrcMap .end (),
642- regSortCompareBeforeRA );
709+ regSortCompareAfterRA );
643710
644711 return true ;
645712}
646713
647714// Replace the send instruction with the payload of
648715// Insert the scalar register intialization mov instructions.
649- bool SRSubPassBeforeRA::replaceWithSendiBeforeRA (G4_BB *bb,
716+ bool SRSubPassAfterRA::replaceWithSendiAfterRA (G4_BB *bb,
650717 INST_LIST_ITER instIter,
651718 regCandidatesBRA &dstSrcRegs) {
652719 G4_INST *inst = *instIter;
@@ -784,7 +851,7 @@ bool SRSubPassBeforeRA::replaceWithSendiBeforeRA(G4_BB *bb,
784851 return true ;
785852}
786853
787- void SRSubPassBeforeRA::SRSubBeforeRA (G4_BB *bb) {
854+ void SRSubPassAfterRA::SRSubAfterRA (G4_BB *bb) {
788855 bb->resetLocalIds ();
789856
790857 class CmpFirstDef {
@@ -803,7 +870,7 @@ void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
803870 G4_INST *inst = *ii;
804871
805872 regCandidatesBRA dstSrcRegs;
806- if (!isSRCandidateBeforeRA (inst, dstSrcRegs)) {
873+ if (!isSRCandidateAfterRA (inst, dstSrcRegs)) {
807874 ii++;
808875 dstSrcRegs.dstSrcMap .clear ();
809876 continue ;
@@ -840,12 +907,26 @@ void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
840907 candidatesIt = candidates.find (inst);
841908 // Is candidate send
842909 if (candidatesIt != candidates.end ()) {
843- bool overwrite = false ;
844910 // Scan backward from the send instruction.
845911 INST_LIST_RITER scan_ri = ri;
846912 scan_ri++;
847913 G4_INST *rInst = *scan_ri;
914+
848915 while (rInst->getLocalId () > candidates[inst].firstDefID ) {
916+ if (rInst->isDead ()) {
917+ // If the inst is marked as dead, it's dst will not kill other value
918+ // Such as in following case, if third instruction is removed, r64
919+ // value of first instruction is kept.
920+ // mov (16) r16.0<1>:ud r64.0<1;1,0>:ud // $214:&226:
921+ // mov (16) r17.0<1>:ud r66.0<1;1,0>:ud // $216:&228:
922+ // mov (16) r64.0<1>:ud r68.0<1;1,0>:ud // $218:&230:
923+ scan_ri++;
924+ if (scan_ri == rend) {
925+ break ;
926+ }
927+ rInst = *scan_ri;
928+ continue ;
929+ }
849930 G4_Operand *dst = rInst->getDst ();
850931 if (dst && !dst->isNullReg ()) {
851932 G4_VarBase *base = dst->getBase ();
@@ -879,16 +960,22 @@ void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
879960 G4_Operand *dst = rInst->getDst ();
880961 unsigned short dstRegLB = dst->getLinearizedStart ();
881962 unsigned short dstRegRB = dst->getLinearizedEnd ();
882- for (int i = 0 ; i < (int )candidates[inst].dstSrcMap .size (); i++) {
883- int srcRegLB =
884- candidates[inst].dstSrcMap [i].opnd ->getLinearizedStart ();
885- int srcRegRB =
886- candidates[inst].dstSrcMap [i].opnd ->getLinearizedEnd ();
887963
964+ // There is any none removeable offset, the offset define move
965+ // cannot be removed.
966+ std::vector<regMapBRA>::iterator dstSrcRegsIter;
967+ for (dstSrcRegsIter = candidates[inst].dstSrcMap .begin ();
968+ dstSrcRegsIter != candidates[inst].dstSrcMap .end ();) {
969+ std::vector<regMapBRA>::iterator nextIter = dstSrcRegsIter;
970+ nextIter++;
971+ int srcRegLB = (*dstSrcRegsIter).opnd ->getLinearizedStart ();
972+ int srcRegRB = (*dstSrcRegsIter).opnd ->getLinearizedEnd ();
888973 if (!(srcRegRB < dstRegLB || srcRegLB > dstRegRB)) {
889974 // Register is reused.
890- overwrite = true ;
891- break ;
975+ dstSrcRegsIter =
976+ candidates[inst].dstSrcMap .erase (dstSrcRegsIter);
977+ } else {
978+ dstSrcRegsIter = nextIter;
892979 }
893980 }
894981 }
@@ -900,22 +987,24 @@ void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
900987 }
901988 rInst = *scan_ri;
902989 }
903- if (overwrite) {
990+
991+ // Due to extra mov for s0, so don't use s0 if equal or less than 1 mov
992+ // inst can be removed.
993+ if (candidates[inst].dstSrcMap .size () <= 1 &&
994+ builder.getuint32Option (vISA_EnableGatherWithImmPreRA) !=
995+ INDIRECT_TYPE::ALWAYS_S0) {
904996 candidates.erase (candidatesIt);
997+ } else {
998+ for (int j = 0 ; j < (int )candidatesIt->second .dstSrcMap .size (); j++) {
999+ G4_INST *movInst = candidatesIt->second .dstSrcMap [j].inst ;
1000+ movInst->markDead ();
1001+ }
9051002 }
9061003 }
9071004
9081005 ri++;
9091006 }
9101007
911- for (candidatesIt = candidates.begin (); candidatesIt != candidates.end ();
912- candidatesIt++) {
913- for (int i = 0 ; i < (int )candidatesIt->second .dstSrcMap .size (); i++) {
914- G4_INST *movInst = candidatesIt->second .dstSrcMap [i].inst ;
915- movInst->markDead ();
916- }
917- }
918-
9191008 // Replace the send instruction with sendi
9201009 // Remove the mov instructions that marked as dead
9211010 INST_LIST_ITER iter;
@@ -926,7 +1015,7 @@ void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
9261015
9271016 candidatesIt = candidates.find (inst);
9281017 if (candidatesIt != candidates.end ()) {
929- replaceWithSendiBeforeRA (bb, curIter, candidates[inst]);
1018+ replaceWithSendiAfterRA (bb, curIter, candidates[inst]);
9301019 }
9311020 if (inst->isDead ()) {
9321021 bb->erase (curIter);
0 commit comments