@@ -19,7 +19,7 @@ static bool regSortCompare(regMap map1, regMap map2) {
1919 return false ;
2020}
2121
22- static bool regSortCompareBeforeRA (regMapBRA map1, regMapBRA map2) {
22+ static bool regSortCompareAfterRA (regMapBRA map1, regMapBRA map2) {
2323 if (map1.opndNum < map2.opndNum ) {
2424 return true ;
2525 } else if (map1.opndNum > map2.opndNum ) {
@@ -430,7 +430,7 @@ void SRSubPass::SRSub(G4_BB *bb) {
430430
431431// Check if current instruction is the candidate of sendi.
432432// Recorded as candidate.
433- bool SRSubPassBeforeRA::isSRCandidateBeforeRA (G4_INST *inst,
433+ bool SRSubPassAfterRA::isSRCandidateAfterRA (G4_INST *inst,
434434 regCandidatesBRA &dstSrcRegs) {
435435 if (!inst->isSend ()) {
436436 return false ;
@@ -482,6 +482,7 @@ bool SRSubPassBeforeRA::isSRCandidateBeforeRA(G4_INST *inst,
482482 int movInstNum = 0 ;
483483 int32_t firstDefID = 0x7FFFFFFF ; // the ID of the first instruction define the
484484 std::vector<std::pair<Gen4_Operand_Number, unsigned >> notRemoveableMap;
485+ std::vector<G4_INST *> immMovs;
485486 for (auto I = inst->def_begin (), E = inst->def_end (); I != E; ++I) {
486487 auto &&def = *I;
487488
@@ -572,14 +573,90 @@ bool SRSubPassBeforeRA::isSRCandidateBeforeRA(G4_INST *inst,
572573
573574 // It's not global define
574575 if (!(builder.getIsKernel () && kernel.fg .getNumBB () == 1 )) {
575- if (kernel.fg .globalOpndHT .isOpndGlobal (dstRgn)) {
576+ if (kernel.fg .globalOpndHT .isOpndGlobal (dstRgn) && !dstRgn-> getTopDcl ()-> getIsBBLocal () ) {
576577 return false ;
577578 }
578579 }
579580
580581 return true ;
581582 };
582583
584+ // mov (16) r81.0<1>:f 0x8:f // $52:&54:
585+ // mov (16|M16) r89.0<1>:f 0x8:f // $53:&55:
586+ // mov (16) r82.0<1>:f 0x0:f // $54:&56:
587+ // mov (16|M16) r90.0<1>:f 0x0:f // $55:&57:
588+ // mov (16) r83.0<1>:f 0x0:f // $56:&58:
589+ // mov (16|M16) r91.0<1>:f 0x0:f // $57:&59:
590+ // mov (16) r84.0<1>:f 0x0:f // $58:&60:
591+ // mov (16|M16) r92.0<1>:f 0x0:f // $59:&61:
592+ // mov (16) r85.0<1>:f 0x0:f // $60:&62:
593+ // mov (16|M16) r93.0<1>:f 0x0:f // $61:&63:
594+ // mov (16) r86.0<1>:f 0x0:f // $62:&64:
595+ // mov (16|M16) r94.0<1>:f 0x0:f // $63:&65:
596+ // mov (16) r87.0<1>:f 0x0:f // $64:&66:
597+ // mov (16|M16) r95.0<1>:f 0x0:f // $65:&67:
598+ // mov (16) r88.0<1>:f 0x0:f // $66:&68:
599+ // mov (16|M16) r96.0<1>:f 0x0:f // $67:&69:
600+ // ==>
601+ // mov (16) r81.0<1>:f 0x8:f // $52:&54:
602+ // mov (16|M16) r89.0<1>:f 0x8:f // $53:&55:
603+ // mov (16) r82.0<1>:f 0x0:f // $54:&56:
604+ // mov (16|M16) r90.0<1>:f 0x0:f // $55:&57:
605+ //
606+ // Reuse r81, r89, r82, r90 in the gather send
607+ auto getRemoveableImm = [this ](G4_INST *inst,
608+ std::vector<G4_INST *> &immMovs) {
609+ // The instruction is only used for payload preparation.
610+ if (inst->use_size () != 1 ) {
611+ return (G4_INST *)nullptr ;
612+ }
613+
614+ G4_DstRegRegion *dst = inst->getDst ();
615+ // dst GRF aligned and contigous
616+ if (dst->getSubRegOff () || dst->getHorzStride () != 1 ) {
617+ return (G4_INST *)nullptr ;
618+ }
619+
620+ if (kernel.fg .globalOpndHT .isOpndGlobal (dst)) {
621+ return (G4_INST *)nullptr ;
622+ }
623+
624+ // GRF Alignment with physical register assigned
625+ if (dst->getLinearizedStart () % builder.getGRFSize () != 0 ) {
626+ return (G4_INST *)nullptr ;
627+ }
628+
629+ // If the destination operand size is less than 1 GRF
630+ if ((dst->getLinearizedEnd () - dst->getLinearizedStart () + 1 ) <
631+ builder.getGRFSize ()) {
632+ return (G4_INST *)nullptr ;
633+ }
634+
635+ G4_Operand *src = inst->getSrc (0 );
636+ int64_t imm = src->asImm ()->getImm ();
637+ for (size_t i = 0 ; i < immMovs.size (); i++) {
638+ G4_INST *imov = immMovs[i];
639+ G4_Operand *isrc = imov->getSrc (0 );
640+ int64_t iimm = isrc->asImm ()->getImm ();
641+ if (imm == iimm &&
642+ src->getType () == isrc->getType () && // Same value and same type
643+ inst->getDst ()->getType () ==
644+ imov->getDst ()->getType () && // Same dst type
645+ inst->getDst ()->asDstRegRegion ()->getHorzStride () ==
646+ imov->getDst ()
647+ ->asDstRegRegion ()
648+ ->getHorzStride () && // Same region
649+ inst->getExecSize () == imov->getExecSize () && // Same execution size
650+ inst->getMaskOffset () ==
651+ imov->getMaskOffset ()) { // Same mask offset
652+ return imov;
653+ }
654+ }
655+ immMovs.push_back (inst);
656+
657+ return (G4_INST *)nullptr ;
658+ };
659+
583660 // if opndNum + offset is defined multiple times, cannobe be removed
584661 G4_Operand *dst = movInst->getDst ();
585662 unsigned offset = dst->getLeftBound () / builder.getGRFSize ();
@@ -604,6 +681,22 @@ bool SRSubPassBeforeRA::isSRCandidateBeforeRA(G4_INST *inst,
604681 movInstNum++;
605682 }
606683 } else {
684+ if (movInst->getSrc (0 ) && movInst->getSrc (0 )->isImm ()) {
685+ // Check if there is mov instruction with same imm value
686+ G4_INST *lvnMov = getRemoveableImm (movInst, immMovs);
687+
688+ if (lvnMov) {
689+ // The offset is the offset of original dst, which is used to identify
690+ // the original register used in send.
691+ // The opndNum is the opndNum of send.
692+ regMapBRA regPair (movInst, opndNum, offset,
693+ lvnMov->getDst ()); // the lvn mov dst can be reused
694+ dstSrcRegs.dstSrcMap .push_back (regPair);
695+ firstDefID = std::min (firstDefID, def.first ->getLocalId ());
696+ movInstNum++;
697+ continue ;
698+ }
699+ }
607700 notRemoveableMap.push_back (std::make_pair (opndNum, offset));
608701 }
609702 }
@@ -639,14 +732,14 @@ bool SRSubPassBeforeRA::isSRCandidateBeforeRA(G4_INST *inst,
639732 dstSrcRegs.firstDefID = firstDefID;
640733 // Sort according to the register order in the original payload
641734 std::sort (dstSrcRegs.dstSrcMap .begin (), dstSrcRegs.dstSrcMap .end (),
642- regSortCompareBeforeRA );
735+ regSortCompareAfterRA );
643736
644737 return true ;
645738}
646739
647740// Replace the send instruction with the payload of
648741// Insert the scalar register intialization mov instructions.
649- bool SRSubPassBeforeRA::replaceWithSendiBeforeRA (G4_BB *bb,
742+ bool SRSubPassAfterRA::replaceWithSendiAfterRA (G4_BB *bb,
650743 INST_LIST_ITER instIter,
651744 regCandidatesBRA &dstSrcRegs) {
652745 G4_INST *inst = *instIter;
@@ -784,7 +877,7 @@ bool SRSubPassBeforeRA::replaceWithSendiBeforeRA(G4_BB *bb,
784877 return true ;
785878}
786879
787- void SRSubPassBeforeRA::SRSubBeforeRA (G4_BB *bb) {
880+ void SRSubPassAfterRA::SRSubAfterRA (G4_BB *bb) {
788881 bb->resetLocalIds ();
789882
790883 class CmpFirstDef {
@@ -803,7 +896,7 @@ void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
803896 G4_INST *inst = *ii;
804897
805898 regCandidatesBRA dstSrcRegs;
806- if (!isSRCandidateBeforeRA (inst, dstSrcRegs)) {
899+ if (!isSRCandidateAfterRA (inst, dstSrcRegs)) {
807900 ii++;
808901 dstSrcRegs.dstSrcMap .clear ();
809902 continue ;
@@ -840,12 +933,26 @@ void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
840933 candidatesIt = candidates.find (inst);
841934 // Is candidate send
842935 if (candidatesIt != candidates.end ()) {
843- bool overwrite = false ;
844936 // Scan backward from the send instruction.
845937 INST_LIST_RITER scan_ri = ri;
846938 scan_ri++;
847939 G4_INST *rInst = *scan_ri;
940+
848941 while (rInst->getLocalId () > candidates[inst].firstDefID ) {
942+ if (rInst->isDead ()) {
943+ // If the inst is marked as dead, it's dst will not kill other value
944+ // Such as in following case, if third instruction is removed, r64
945+ // value of first instruction is kept.
946+ // mov (16) r16.0<1>:ud r64.0<1;1,0>:ud // $214:&226:
947+ // mov (16) r17.0<1>:ud r66.0<1;1,0>:ud // $216:&228:
948+ // mov (16) r64.0<1>:ud r68.0<1;1,0>:ud // $218:&230:
949+ scan_ri++;
950+ if (scan_ri == rend) {
951+ break ;
952+ }
953+ rInst = *scan_ri;
954+ continue ;
955+ }
849956 G4_Operand *dst = rInst->getDst ();
850957 if (dst && !dst->isNullReg ()) {
851958 G4_VarBase *base = dst->getBase ();
@@ -879,16 +986,22 @@ void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
879986 G4_Operand *dst = rInst->getDst ();
880987 unsigned short dstRegLB = dst->getLinearizedStart ();
881988 unsigned short dstRegRB = dst->getLinearizedEnd ();
882- for (int i = 0 ; i < (int )candidates[inst].dstSrcMap .size (); i++) {
883- int srcRegLB =
884- candidates[inst].dstSrcMap [i].opnd ->getLinearizedStart ();
885- int srcRegRB =
886- candidates[inst].dstSrcMap [i].opnd ->getLinearizedEnd ();
887989
990+ // There is any none removeable offset, the offset define move
991+ // cannot be removed.
992+ std::vector<regMapBRA>::iterator dstSrcRegsIter;
993+ for (dstSrcRegsIter = candidates[inst].dstSrcMap .begin ();
994+ dstSrcRegsIter != candidates[inst].dstSrcMap .end ();) {
995+ std::vector<regMapBRA>::iterator nextIter = dstSrcRegsIter;
996+ nextIter++;
997+ int srcRegLB = (*dstSrcRegsIter).opnd ->getLinearizedStart ();
998+ int srcRegRB = (*dstSrcRegsIter).opnd ->getLinearizedEnd ();
888999 if (!(srcRegRB < dstRegLB || srcRegLB > dstRegRB)) {
8891000 // Register is reused.
890- overwrite = true ;
891- break ;
1001+ dstSrcRegsIter =
1002+ candidates[inst].dstSrcMap .erase (dstSrcRegsIter);
1003+ } else {
1004+ dstSrcRegsIter = nextIter;
8921005 }
8931006 }
8941007 }
@@ -900,22 +1013,24 @@ void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
9001013 }
9011014 rInst = *scan_ri;
9021015 }
903- if (overwrite) {
1016+
1017+ // Due to extra mov for s0, so don't use s0 if equal or less than 1 mov
1018+ // inst can be removed.
1019+ if (candidates[inst].dstSrcMap .size () <= 1 &&
1020+ builder.getuint32Option (vISA_EnableGatherWithImmPreRA) !=
1021+ INDIRECT_TYPE::ALWAYS_S0) {
9041022 candidates.erase (candidatesIt);
1023+ } else {
1024+ for (int j = 0 ; j < (int )candidatesIt->second .dstSrcMap .size (); j++) {
1025+ G4_INST *movInst = candidatesIt->second .dstSrcMap [j].inst ;
1026+ movInst->markDead ();
1027+ }
9051028 }
9061029 }
9071030
9081031 ri++;
9091032 }
9101033
911- for (candidatesIt = candidates.begin (); candidatesIt != candidates.end ();
912- candidatesIt++) {
913- for (int i = 0 ; i < (int )candidatesIt->second .dstSrcMap .size (); i++) {
914- G4_INST *movInst = candidatesIt->second .dstSrcMap [i].inst ;
915- movInst->markDead ();
916- }
917- }
918-
9191034 // Replace the send instruction with sendi
9201035 // Remove the mov instructions that marked as dead
9211036 INST_LIST_ITER iter;
@@ -926,7 +1041,7 @@ void SRSubPassBeforeRA::SRSubBeforeRA(G4_BB *bb) {
9261041
9271042 candidatesIt = candidates.find (inst);
9281043 if (candidatesIt != candidates.end ()) {
929- replaceWithSendiBeforeRA (bb, curIter, candidates[inst]);
1044+ replaceWithSendiAfterRA (bb, curIter, candidates[inst]);
9301045 }
9311046 if (inst->isDead ()) {
9321047 bb->erase (curIter);
0 commit comments