@@ -1993,15 +1993,12 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
19931993 mulInst->setCondMod (nullptr );
19941994 }
19951995
1996- // create a mach inst
1997- G4_INST* machInst = builder.createBinOp (G4_mach, mulInst->getExecSize (),
1998- origDst, builder.duplicateOperand (src0), builder.duplicateOperand (src1), origOptions, false );
1996+ // create a macl inst
1997+ G4_INST* machInst = builder.createMacl ( mulInst->getExecSize (),
1998+ origDst, builder.duplicateOperand (src0), builder.duplicateOperand (src1), origOptions, accType );
19991999 machInst->setPredicate (predicate);
20002000
20012001 // maintain du chain as fixAccDst uses it later
2002- G4_SrcRegRegion* accSrcOpnd = builder.createSrcRegRegion (Mod_src_undef, Direct,
2003- builder.phyregpool .getAcc0Reg (), 0 , 0 , builder.getRegionStride1 (), accType);
2004- machInst->setImplAccSrc (accSrcOpnd);
20052002 mulInst->addDefUse (machInst, Opnd_implAccSrc);
20062003
20072004 INST_LIST_ITER machIter = it;
@@ -2228,10 +2225,8 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
22282225 }
22292226
22302227 // create a mach inst
2231- G4_INST* newInst = builder.createBinOp (G4_mach, exec_size, machDst,
2232- builder.duplicateOperand (src0), builder.duplicateOperand (src1), inst_opt, false );
2233-
2234- newInst->setOptionOn (InstOpt_AccWrCtrl);
2228+ G4_INST* newInst = builder.createMach (exec_size, machDst,
2229+ builder.duplicateOperand (src0), builder.duplicateOperand (src1), inst_opt, tmp_type);
22352230
22362231 INST_LIST_ITER iter = i;
22372232 iter++;
@@ -2244,16 +2239,11 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
22442239 inst->transferUse (newInst);
22452240 inst->addDefUse (newInst, Opnd_implAccSrc);
22462241
2247- // create an implicit source for MACH
2242+ // create an explciit acc source for later use
22482243 const RegionDesc* rd = exec_size > 1 ? builder.getRegionStride1 () : builder.getRegionScalar ();
22492244 G4_SrcRegRegion* acc_src_opnd = builder.createSrcRegRegion (Mod_src_undef, Direct,
22502245 builder.phyregpool .getAcc0Reg (), 0 , 0 , rd, tmp_type);
22512246
2252- newInst->setImplAccSrc (acc_src_opnd);
2253-
2254- // set an implicit dst for MACH
2255- newInst->setImplAccDst (builder.createDstRegRegion (*acc_dst_opnd));
2256-
22572247 insertedInst = true ;
22582248
22592249 if (IS_QTYPE (dst->getType ()))
@@ -2508,54 +2498,48 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25082498
25092499 bb->insertBefore (iter, newMul);
25102500 inst->copyDefsTo (newMul, false );
2511- newMul->addDefUse (inst, Opnd_implAccSrc);
25122501
2513- iter = i;
2514- iter--;
2515- fixMulSrc1 (iter, bb);
25162502
2503+ fixMulSrc1 (std::prev (i), bb);
25172504 newMul->setNoMask (true );
25182505
2519- inst->setOpcode (G4_mach);
2520-
2506+ auto machSrc1 = inst->getSrc (1 );
25212507 if (src1->isImm () && src0->getType () != src1->getType ())
25222508 {
25232509 G4_Imm* oldImm = src1->asImm ();
25242510 // Ensure src1 has the same type as src0.
2525- G4_Imm* newImm = builder.createImm (oldImm->getInt (), src0->getType ());
2526- inst->setSrc (newImm, 1 );
2511+ machSrc1 = builder.createImm (oldImm->getInt (), src0->getType ());
25272512 }
25282513 else if (!IS_DTYPE (src1->getType ()))
25292514 {
25302515 // this can happen due to vISA opt, convert them to src0 type which should be D/UD
25312516 // We use D as the tmp type to make sure we can represent all src1 values
2532- auto isSrc1NonScalar = inst->getSrc (1 )->isSrcRegRegion () && !inst->getSrc (1 )->asSrcRegRegion ()->isScalar ();
2533- auto newSrc = insertMovBefore (i, 1 , Type_D, bb);
2534- inst->setSrc (builder.createSrcRegRegion (Mod_src_undef, Direct, newSrc->getTopDcl ()->getRegVar (), 0 , 0 ,
2535- isSrc1NonScalar ? builder.getRegionStride1 () : builder.getRegionScalar (), src0->getType ()), 1 );
2517+ machSrc1 = insertMovBefore (i, 1 , Type_D, bb);
25362518 }
25372519
2538- // set implicit src/dst for mach
2539- const RegionDesc* rd = exec_size > 1 ? builder.getRegionStride1 () : builder.getRegionScalar ();
2540- G4_SrcRegRegion* acc_src_opnd = builder.createSrcRegRegion (Mod_src_undef, Direct, builder.phyregpool .getAcc0Reg (), 0 , 0 , rd, tmp_type);
2541- inst->setImplAccSrc (acc_src_opnd);
2542- inst->setImplAccDst (builder.createDstRegRegion (*acc_dst_opnd));
2520+ // We don't duplicate the operands here as original inst is unlinked
2521+ // ToDo: this invalidate du-chain, do we still need to maintain it?
2522+ auto machInst = builder.createMach (inst->getExecSize (), inst->getDst (), inst->getSrc (0 ), machSrc1, inst_opt, tmp_type);
2523+ machInst->setPredicate (inst->getPredicate ());
2524+ machInst->setCondMod (inst->getCondMod ());
2525+ *i = machInst;
2526+ inst->transferUse (machInst);
2527+ inst->removeAllDefs ();
2528+ newMul->addDefUse (machInst, Opnd_implAccSrc);
25432529
25442530 INST_LIST_ITER end_iter = i;
25452531 // check if the ACC source is aligned to mach dst
2532+ // ToDo: this should be checked by fixAcc?
25462533 G4_DstRegRegion* dst = inst->getDst ();
2547- if ((inst->getSaturate ()) ||
2548- (dst &&
2549- ((dst->getExecTypeSize () > G4_Type_Table[Type_D].byteSize ) ||
2550- (isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst)))))
2534+ if (inst->getSaturate () ||
2535+ dst->getExecTypeSize () > G4_Type_Table[Type_D].byteSize ||
2536+ isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst))
25512537 {
25522538 // add a tmp mov
2553- inst ->setDest (insertMovAfter (i, dst, dst->getType (), bb));
2539+ machInst ->setDest (insertMovAfter (i, dst, dst->getType (), bb));
25542540 end_iter++;
25552541 }
25562542
2557- inst->setOptionOn (InstOpt_AccWrCtrl);
2558-
25592543 if (exec_size > builder.getNativeExecSize ())
25602544 {
25612545 auto start_iter = std::prev (i);
0 commit comments