Skip to content

Commit 66d1f97

Browse files
weiyu-chensys_zuul
authored andcommitted
Add createMach/createMacl wrapper.
Change-Id: Ie106d546940317a3f112a0707007d2501da9d4d1
1 parent f56e179 commit 66d1f97

File tree

3 files changed

+57
-40
lines changed

3 files changed

+57
-40
lines changed

visa/BuildIR.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1860,6 +1860,12 @@ class IR_Builder {
18601860
G4_INST* createBinOp(G4_opcode op, uint8_t execSize, G4_DstRegRegion* dst,
18611861
G4_Operand* src0, G4_Operand* src1, uint32_t option, bool appendToInstList);
18621862

1863+
G4_INST* createMach(uint8_t execSize, G4_DstRegRegion* dst,
1864+
G4_Operand* src0, G4_Operand* src1, uint32_t option, G4_Type accType);
1865+
1866+
G4_INST* createMacl(uint8_t execSize, G4_DstRegRegion* dst,
1867+
G4_Operand* src0, G4_Operand* src1, uint32_t option, G4_Type accType);
1868+
18631869
static G4_MathOp Get_MathFuncCtrl(ISA_Opcode op, G4_Type type);
18641870

18651871
void resizePredefinedStackVars();

visa/BuildIRImpl.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,33 @@ G4_INST* IR_Builder::createBinOp(G4_opcode op, uint8_t execSize, G4_DstRegRegion
391391
}
392392
}
393393

394+
// mach creates both implicit acc and src using the supplied accType. AccWrCtrl is turned on.
395+
// acc0.0 is always used
396+
G4_INST* IR_Builder::createMach(uint8_t execSize, G4_DstRegRegion* dst,
397+
G4_Operand* src0, G4_Operand* src1, uint32_t option, G4_Type accType)
398+
{
399+
auto machInst = createInternalInst(nullptr, G4_mach, nullptr, false, execSize, dst, src0, src1, option);
400+
const RegionDesc* rd = execSize > 1 ? getRegionStride1() : getRegionScalar();
401+
auto accSrc = createSrcRegRegion(Mod_src_undef, Direct, phyregpool.getAcc0Reg(), 0, 0, rd, accType);
402+
machInst->setImplAccSrc(accSrc);
403+
auto accDSt = createDst(phyregpool.getAcc0Reg(), 0, 0, 1, accType);
404+
machInst->setImplAccDst(accDSt);
405+
machInst->setOptionOn(InstOpt_AccWrCtrl);
406+
return machInst;
407+
}
408+
409+
// macl creates an implicit src using the supplied the accType. AccWrCtrl is not set.
410+
// acc0.0 is always used
411+
G4_INST* IR_Builder::createMacl(uint8_t execSize, G4_DstRegRegion* dst,
412+
G4_Operand* src0, G4_Operand* src1, uint32_t option, G4_Type accType)
413+
{
414+
auto maclInst = createInternalInst(nullptr, G4_mach, nullptr, false, execSize, dst, src0, src1, option);
415+
const RegionDesc* rd = execSize > 1 ? getRegionStride1() : getRegionScalar();
416+
auto accSrc = createSrcRegRegion(Mod_src_undef, Direct, phyregpool.getAcc0Reg(), 0, 0, rd, accType);
417+
maclInst->setImplAccSrc(accSrc);
418+
return maclInst;
419+
}
420+
394421
G4_INST* IR_Builder::createIf(G4_Predicate* prd, uint8_t size, uint32_t option)
395422
{
396423
auto inst = createCFInst(prd, G4_if, size, nullptr, nullptr, option);

visa/HWConformity.cpp

Lines changed: 24 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1993,15 +1993,12 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
19931993
mulInst->setCondMod(nullptr);
19941994
}
19951995

1996-
// create a mach inst
1997-
G4_INST* machInst = builder.createBinOp(G4_mach, mulInst->getExecSize(),
1998-
origDst, builder.duplicateOperand(src0), builder.duplicateOperand(src1), origOptions, false);
1996+
// create a macl inst
1997+
G4_INST* machInst = builder.createMacl(mulInst->getExecSize(),
1998+
origDst, builder.duplicateOperand(src0), builder.duplicateOperand(src1), origOptions, accType);
19991999
machInst->setPredicate(predicate);
20002000

20012001
// maintain du chain as fixAccDst uses it later
2002-
G4_SrcRegRegion* accSrcOpnd = builder.createSrcRegRegion(Mod_src_undef, Direct,
2003-
builder.phyregpool.getAcc0Reg(), 0, 0, builder.getRegionStride1(), accType);
2004-
machInst->setImplAccSrc(accSrcOpnd);
20052002
mulInst->addDefUse(machInst, Opnd_implAccSrc);
20062003

20072004
INST_LIST_ITER machIter = it;
@@ -2228,10 +2225,8 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
22282225
}
22292226

22302227
// create a mach inst
2231-
G4_INST* newInst = builder.createBinOp(G4_mach, exec_size, machDst,
2232-
builder.duplicateOperand(src0), builder.duplicateOperand(src1), inst_opt, false);
2233-
2234-
newInst->setOptionOn(InstOpt_AccWrCtrl);
2228+
G4_INST* newInst = builder.createMach(exec_size, machDst,
2229+
builder.duplicateOperand(src0), builder.duplicateOperand(src1), inst_opt, tmp_type);
22352230

22362231
INST_LIST_ITER iter = i;
22372232
iter++;
@@ -2244,16 +2239,11 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
22442239
inst->transferUse(newInst);
22452240
inst->addDefUse(newInst, Opnd_implAccSrc);
22462241

2247-
// create an implicit source for MACH
2242+
// create an explciit acc source for later use
22482243
const RegionDesc* rd = exec_size > 1 ? builder.getRegionStride1() : builder.getRegionScalar();
22492244
G4_SrcRegRegion* acc_src_opnd = builder.createSrcRegRegion(Mod_src_undef, Direct,
22502245
builder.phyregpool.getAcc0Reg(), 0, 0, rd, tmp_type);
22512246

2252-
newInst->setImplAccSrc(acc_src_opnd);
2253-
2254-
// set an implicit dst for MACH
2255-
newInst->setImplAccDst(builder.createDstRegRegion(*acc_dst_opnd));
2256-
22572247
insertedInst = true;
22582248

22592249
if (IS_QTYPE(dst->getType()))
@@ -2508,54 +2498,48 @@ void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25082498

25092499
bb->insertBefore(iter, newMul);
25102500
inst->copyDefsTo(newMul, false);
2511-
newMul->addDefUse(inst, Opnd_implAccSrc);
25122501

2513-
iter = i;
2514-
iter--;
2515-
fixMulSrc1(iter, bb);
25162502

2503+
fixMulSrc1(std::prev(i), bb);
25172504
newMul->setNoMask(true);
25182505

2519-
inst->setOpcode(G4_mach);
2520-
2506+
auto machSrc1 = inst->getSrc(1);
25212507
if (src1->isImm() && src0->getType() != src1->getType())
25222508
{
25232509
G4_Imm* oldImm = src1->asImm();
25242510
// Ensure src1 has the same type as src0.
2525-
G4_Imm* newImm = builder.createImm(oldImm->getInt(), src0->getType());
2526-
inst->setSrc(newImm, 1);
2511+
machSrc1 = builder.createImm(oldImm->getInt(), src0->getType());
25272512
}
25282513
else if (!IS_DTYPE(src1->getType()))
25292514
{
25302515
// this can happen due to vISA opt, convert them to src0 type which should be D/UD
25312516
// We use D as the tmp type to make sure we can represent all src1 values
2532-
auto isSrc1NonScalar = inst->getSrc(1)->isSrcRegRegion() && !inst->getSrc(1)->asSrcRegRegion()->isScalar();
2533-
auto newSrc = insertMovBefore(i, 1, Type_D, bb);
2534-
inst->setSrc(builder.createSrcRegRegion(Mod_src_undef, Direct, newSrc->getTopDcl()->getRegVar(), 0, 0,
2535-
isSrc1NonScalar ? builder.getRegionStride1() : builder.getRegionScalar(), src0->getType()), 1);
2517+
machSrc1 = insertMovBefore(i, 1, Type_D, bb);
25362518
}
25372519

2538-
//set implicit src/dst for mach
2539-
const RegionDesc* rd = exec_size > 1 ? builder.getRegionStride1() : builder.getRegionScalar();
2540-
G4_SrcRegRegion* acc_src_opnd = builder.createSrcRegRegion(Mod_src_undef, Direct, builder.phyregpool.getAcc0Reg(), 0, 0, rd, tmp_type);
2541-
inst->setImplAccSrc(acc_src_opnd);
2542-
inst->setImplAccDst(builder.createDstRegRegion(*acc_dst_opnd));
2520+
// We don't duplicate the operands here as original inst is unlinked
2521+
// ToDo: this invalidate du-chain, do we still need to maintain it?
2522+
auto machInst = builder.createMach(inst->getExecSize(), inst->getDst(), inst->getSrc(0), machSrc1, inst_opt, tmp_type);
2523+
machInst->setPredicate(inst->getPredicate());
2524+
machInst->setCondMod(inst->getCondMod());
2525+
*i = machInst;
2526+
inst->transferUse(machInst);
2527+
inst->removeAllDefs();
2528+
newMul->addDefUse(machInst, Opnd_implAccSrc);
25432529

25442530
INST_LIST_ITER end_iter = i;
25452531
// check if the ACC source is aligned to mach dst
2532+
// ToDo: this should be checked by fixAcc?
25462533
G4_DstRegRegion* dst = inst->getDst();
2547-
if ((inst->getSaturate()) ||
2548-
(dst &&
2549-
((dst->getExecTypeSize() > G4_Type_Table[Type_D].byteSize) ||
2550-
(isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst)))))
2534+
if (inst->getSaturate() ||
2535+
dst->getExecTypeSize() > G4_Type_Table[Type_D].byteSize ||
2536+
isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst))
25512537
{
25522538
// add a tmp mov
2553-
inst->setDest(insertMovAfter(i, dst, dst->getType(), bb));
2539+
machInst->setDest(insertMovAfter(i, dst, dst->getType(), bb));
25542540
end_iter++;
25552541
}
25562542

2557-
inst->setOptionOn(InstOpt_AccWrCtrl);
2558-
25592543
if (exec_size > builder.getNativeExecSize())
25602544
{
25612545
auto start_iter = std::prev(i);

0 commit comments

Comments
 (0)