@@ -669,25 +669,36 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
669669 return (G4_INST *)nullptr ;
670670 };
671671
672- // if opndNum + offset is defined multiple times, cannobe be removed
673- G4_Operand *dst = movInst->getDst ();
674- unsigned offset = dst->getLeftBound () / builder.getGRFSize ();
675672 // The source opndNum of send instruction which was defined
676673 Gen4_Operand_Number opndNum = (*I).second ;
674+ unsigned srcStartGRF = inst->getOperand (opndNum)->getLinearizedStart () /
675+ builder.getGRFSize ();
676+ // if opndNum + offset is defined multiple times, cannobe be removed
677+ G4_Operand *dst = movInst->getDst ();
678+ unsigned startOffset =
679+ (dst->getLinearizedStart () / builder.getGRFSize ()) - srcStartGRF;
680+ unsigned dstSize = (dst->getLinearizedEnd () - dst->getLinearizedStart () +
681+ builder.getGRFSize () - 1 ) /
682+ builder.getGRFSize ();
677683
678684 if (isRemoveAble (movInst)) {
679685 auto iter = std::find_if (
680686 dstSrcRegs.dstSrcMap .begin (), dstSrcRegs.dstSrcMap .end (),
681- [opndNum, offset](regMapBRA regmap) {
682- return regmap.opndNum == opndNum &&
683- regmap.offset == offset;
684- });
687+ [opndNum, dst](regMapBRA regmap) {
688+ return regmap.opndNum == opndNum &&
689+ !((regmap.inst ->getDst ()->getLinearizedStart () >
690+ dst->getLinearizedEnd ()) ||
691+ (dst->getLinearizedStart () >
692+ regmap.inst ->getDst ()->getLinearizedEnd ()));
693+ });
685694 // if multiple defined, cannot be removed
686695 if (iter != dstSrcRegs.dstSrcMap .end ()) {
687- notRemoveableMap.push_back (std::make_pair (opndNum, offset));
696+ for (unsigned offset = startOffset; offset < dstSize; offset++) {
697+ notRemoveableMap.push_back (std::make_pair (opndNum, offset));
698+ }
688699 } else {
689700 G4_Operand *src = movInst->getSrc (0 );
690- regMapBRA regPair (movInst, opndNum, offset , src);// mov source
701+ regMapBRA regPair (movInst, opndNum, startOffset , src); // mov source
691702 dstSrcRegs.dstSrcMap .push_back (regPair);
692703 firstDefID = std::min (firstDefID, def.first ->getLocalId ());
693704 movInstNum++;
@@ -702,15 +713,17 @@ bool SRSubPassAfterRA::isSRCandidateAfterRA(G4_INST *inst,
702713 // The offset is the offset of original dst, which is used to identify
703714 // the original register used in send.
704715 // The opndNum is the opndNum of send.
705- regMapBRA regPair (movInst, opndNum, offset ,
716+ regMapBRA regPair (movInst, opndNum, startOffset ,
706717 lvnMov->getDst ()); // the lvn mov dst can be reused
707718 dstSrcRegs.dstSrcMap .push_back (regPair);
708719 firstDefID = std::min (firstDefID, def.first ->getLocalId ());
709720 movInstNum++;
710721 continue ;
711722 }
712723 }
713- notRemoveableMap.push_back (std::make_pair (opndNum, offset));
724+ for (unsigned offset = startOffset; offset < dstSize; offset++) {
725+ notRemoveableMap.push_back (std::make_pair (opndNum, offset));
726+ }
714727 }
715728 }
716729
@@ -776,9 +789,10 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
776789 bool replaced = false ;
777790 if (j < (int )dstSrcRegs.dstSrcMap .size () &&
778791 dstSrcRegs.dstSrcMap [j].opndNum == Opnd_src0) {
779- int opndSize = (dstSrcRegs.dstSrcMap [j].opnd ->getLinearizedEnd () -
780- dstSrcRegs.dstSrcMap [j].opnd ->getLinearizedStart () + 1 ) /
781- GRFSize;
792+ int opndSize =
793+ (dstSrcRegs.dstSrcMap [j].opnd ->getLinearizedEnd () -
794+ dstSrcRegs.dstSrcMap [j].opnd ->getLinearizedStart () + GRFSize - 1 ) /
795+ GRFSize;
782796 int srcOffset = src0->getLeftBound () / GRFSize + i;
783797 int opndOffset = dstSrcRegs.dstSrcMap [j].offset ;
784798
@@ -819,9 +833,10 @@ bool SRSubPassAfterRA::replaceWithSendiAfterRA(G4_BB *bb,
819833 bool replaced = false ;
820834 if (j < (int )dstSrcRegs.dstSrcMap .size () &&
821835 dstSrcRegs.dstSrcMap [j].opndNum == Opnd_src1) {
822- int opndSize = (dstSrcRegs.dstSrcMap [j].opnd ->getLinearizedEnd () -
823- dstSrcRegs.dstSrcMap [j].opnd ->getLinearizedStart () + 1 ) /
824- GRFSize;
836+ int opndSize =
837+ (dstSrcRegs.dstSrcMap [j].opnd ->getLinearizedEnd () -
838+ dstSrcRegs.dstSrcMap [j].opnd ->getLinearizedStart () + GRFSize - 1 ) /
839+ GRFSize;
825840 int srcOffset = src1->getLeftBound () / GRFSize + i;
826841 int opndOffset = dstSrcRegs.dstSrcMap [j].offset ;
827842
0 commit comments