@@ -7948,24 +7948,51 @@ INST_LIST_ITER HWConformity::fixMadwInst(INST_LIST_ITER it, G4_BB* bb)
79487948 // sat cannot be used at all in the macro sequence
79497949 // make the dst GRF-aligned before expanding to macro
79507950 if (madwInst->getSaturate () ||
7951+ dst->getHorzStride () != 1 ||
79517952 isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst) ||
79527953 !builder.isOpndAligned (dst, getGRFSize ()))
79537954 {
7954- // add a tmp mov
7955- madwInst->setDest (insertMovAfter (it, dst, dst->getType (), bb, GRFALIGN));
7956- dst = madwInst->getDst ();
7955+ // add tmp mov instructions
7956+ int dstLowGRFNum = (int )std::ceil ((float )(execSize * dst->getExecTypeSize ()) / getGRFSize ());
7957+ int dstTotalGRFNum = dstLowGRFNum * 2 ;
7958+
7959+ G4_Declare* newDstDcl = builder.createTempVar (numEltPerGRF (dst->getType ()) * dstTotalGRFNum, dst->getType (), GRFALIGN);
7960+
7961+ // add a tmp mov for low results in dst
7962+ G4_Declare* lowMovSrcDcl = builder.createTempVar (numEltPerGRF (dst->getType ()) * dstLowGRFNum, dst->getType (), GRFALIGN);
7963+ lowMovSrcDcl->setAliasDeclare (newDstDcl, 0 );
7964+ G4_SrcRegRegion* lowMovSrc = builder.createSrcRegRegion (lowMovSrcDcl, builder.getRegionStride1 ());
7965+ auto dstLow = builder.createDst (dst->getBase (), dst->getRegOff (), dst->getSubRegOff (), dst->getHorzStride (), dst->getType ());
7966+ G4_INST* lowMovInst = builder.createMov (execSize, dstLow, lowMovSrc, madwInst->getMaskOption (), false );
7967+ lowMovInst->setPredicate (madwInst->getPredicate ());
7968+ lowMovInst->setSaturate (madwInst->getSaturate ());
7969+ auto insertIter = bb->insertAfter (it, lowMovInst);
7970+ maintainDU4TempMov (madwInst, lowMovInst);
7971+
7972+ // add a tmp mov for high results in dst
7973+ G4_Declare* hiMovSrcDcl = builder.createTempVar (numEltPerGRF (dst->getType ()) * dstLowGRFNum, dst->getType (), GRFALIGN);
7974+ hiMovSrcDcl->setAliasDeclare (newDstDcl, dstLowGRFNum * getGRFSize ());
7975+ G4_SrcRegRegion* hiMovSrc = builder.createSrcRegRegion (hiMovSrcDcl, builder.getRegionStride1 ());
7976+ auto dstHi = builder.createDst (dst->getBase (), dst->getRegOff () + dstLowGRFNum, dst->getSubRegOff (), dst->getHorzStride (), dst->getType ());
7977+ G4_INST* hiMovInst = builder.createMov (execSize, dstHi, hiMovSrc, madwInst->getMaskOption (), false );
7978+ hiMovInst->setPredicate (madwInst->getPredicate ());
7979+ hiMovInst->setSaturate (madwInst->getSaturate ());
7980+ bb->insertAfter (insertIter, hiMovInst);
7981+ maintainDU4TempMov (madwInst, hiMovInst);
7982+
7983+ G4_DstRegRegion* newDst = builder.createDstRegRegion (newDstDcl, 1 );
7984+ madwInst->setDest (newDst);
7985+ madwInst->setPredicate (nullptr );
7986+ madwInst->setSaturate (g4::NOSAT);
7987+ dst = newDst;
79577988 }
79587989
7959- // G4_Type tmpType = (IS_UNSIGNED_INT(src0->getType()) && IS_UNSIGNED_INT(src1->getType()) && IS_UNSIGNED_INT(src2->getType())) ? Type_UD : Type_D;
79607990 INST_LIST_ITER retIter = it;
79617991 if (builder.noMulOrMadwExpandingBeforeScheduler () && builder.getOption (vISA_expandMadwPostSchedule))
79627992 {
79637993 // Here just create tmp variables to fix srcMod, cond modifier, saturate, etc. And Madw->Mul+Mach+Addc+Add expanding
79647994 // will be done in expandMadwPostSchedule pass.
79657995
7966- // sat has bee resolved above, here just set it as NOSAT
7967- madwInst->setSaturate (g4::NOSAT);
7968-
79697996 // need extra mov if dst is acc and src0 is indirect
79707997 if (!builder.accDstforIndirectSrc ())
79717998 {
0 commit comments