Skip to content

Commit c2c0fe9

Browse files
bcheng0127igcbot
authored andcommitted
SWSB: track WAR A0 register dependence
Current solution is too conservative, which may affect performance because setting A@1.
1 parent 0db8a75 commit c2c0fe9

File tree

3 files changed

+87
-54
lines changed

3 files changed

+87
-54
lines changed

visa/HWCaps.inc

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -464,9 +464,13 @@ uint32_t getNumAddrRegisters() const {
464464
return 16;
465465
}
466466

467-
uint32_t getGRFNumOfAddrRegisters() const {
468-
// The element size of address register is 16 bits
469-
return ((getNumAddrRegisters() * 2 - 1) / numEltPerGRF<Type_UB>() + 1);
467+
// each address register is 16 bits
468+
uint32_t getNumAddrRegistersInGRFSizeSWSB() const {
469+
if (hasThreeALUPipes() || hasFourALUPipes()) {
470+
return ((16 * G4_WSIZE) + numEltPerGRF<Type_UB>() - 1) / numEltPerGRF<Type_UB>();
471+
} else {
472+
return 0;
473+
}
470474
}
471475

472476
uint32_t getNumScalarRegisters(void) {
@@ -735,7 +739,7 @@ bool hasWriteCombine() const {
735739
}
736740

737741
bool hasA0WARHWissue() {
738-
return getPlatform() >= Xe_XeHPSDV;
742+
return getPlatform() >= Xe_XeHPSDV && getPlatform() < Xe2;
739743
}
740744

741745
bool hasFtoPackedHFMove() const { return getPlatform() >= Xe_DG2; }
@@ -884,8 +888,8 @@ bool supports4GRFAlign() const {
884888
return false;
885889
}
886890

887-
bool needA0WARForSend() const {
888-
return false;
891+
bool needA0WAR() const {
892+
return (getPlatform() >= Xe2);
889893
}
890894

891895
bool alwaysAllowGlobalFlagOpt() const {

visa/LocalScheduler/SWSB_G4IR.cpp

Lines changed: 72 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,8 @@ void SBNode::finalizeDistanceType1(IR_Builder &builder,
550550
return;
551551
}
552552

553-
if (builder.hasA0WARHWissue() && (builder.hasThreeALUPipes() || builder.hasFourALUPipes())) {
553+
if (builder.hasA0WARHWissue() &&
554+
(builder.hasThreeALUPipes() || builder.hasFourALUPipes())) {
554555
G4_INST *inst = GetInstruction();
555556

556557
if (inst->getDst() && inst->getDst()->isDirectA0()) {
@@ -560,7 +561,6 @@ void SBNode::finalizeDistanceType1(IR_Builder &builder,
560561
return;
561562
}
562563
}
563-
564564
unsigned curDistance = (unsigned)instVec.front()->getDistance();
565565
if (!distDep.empty()) {
566566
SB_INST_PIPE depPipe = PIPE_NONE;
@@ -693,7 +693,8 @@ void SBNode::finalizeDistanceType2(IR_Builder &builder,
693693
return;
694694
}
695695

696-
if (builder.hasA0WARHWissue() && (builder.hasThreeALUPipes() || builder.hasFourALUPipes())) {
696+
if (builder.hasA0WARHWissue() &&
697+
(builder.hasThreeALUPipes() || builder.hasFourALUPipes())) {
697698
G4_INST *inst = GetInstruction();
698699

699700
if (inst->getDst() && inst->getDst()->isDirectA0()) {
@@ -851,7 +852,8 @@ void SBNode::finalizeDistanceType3(IR_Builder &builder,
851852
return;
852853
}
853854

854-
if (builder.hasA0WARHWissue() && (builder.hasThreeALUPipes() || builder.hasFourALUPipes())) {
855+
if (builder.hasA0WARHWissue() &&
856+
(builder.hasThreeALUPipes() || builder.hasFourALUPipes())) {
855857
G4_INST *inst = GetInstruction();
856858

857859
if (inst->getDst() && inst->getDst()->isDirectA0()) {
@@ -1232,18 +1234,33 @@ SBFootprint *G4_BB_SB::getFootprintForFlag(G4_Operand *opnd,
12321234
SBFootprint *G4_BB_SB::getFootprintForA0(G4_Operand *opnd,
12331235
Gen4_Operand_Number opnd_num,
12341236
G4_INST *inst) {
1237+
bool valid = true;
1238+
unsigned subRegNum = 0;
1239+
if (opnd->isSrcRegRegion()) {
1240+
G4_SrcRegRegion *srcRegRegion = opnd->asSrcRegRegion();
1241+
if (srcRegRegion->getRegAccess() == Direct) {
1242+
subRegNum = srcRegRegion->ExSubRegNum(valid);
1243+
} else {
1244+
subRegNum = srcRegRegion->ExIndSubRegNum(valid);
1245+
}
1246+
} else if (opnd->isDstRegRegion()) {
1247+
G4_DstRegRegion *dstRegRegion = opnd->asDstRegRegion();
1248+
if (dstRegRegion->getRegAccess() == Direct) {
1249+
subRegNum = dstRegRegion->ExSubRegNum(valid);
1250+
} else {
1251+
subRegNum = dstRegRegion->ExIndSubRegNum(valid);
1252+
}
1253+
} else {
1254+
vISA_ASSERT_UNREACHABLE("invalid A0 operand");
1255+
}
1256+
12351257
unsigned short LB = 0;
12361258
unsigned short RB = 0;
12371259
G4_Type type = opnd->getType();
1260+
G4_Type addrType = opnd->isIndirect() ? ADDR_REG_TYPE : opnd->getType();
1261+
LB = subRegNum * TypeSize(addrType);
1262+
RB = opnd->getRightBound() - opnd->getLeftBound() + LB;
12381263

1239-
bool valid = true;
1240-
unsigned subRegOff = opnd->getBase()->ExSubRegNum(valid);
1241-
G4_Type addrType = opnd->isIndirect() ? Type_UW : opnd->getType();
1242-
1243-
LB = (unsigned short)(subRegOff * TypeSize(addrType));
1244-
RB = (unsigned short)(LB + opnd->getRightBound() - opnd->getLeftBound());
1245-
1246-
// Updated to the bucket footprint
12471264
LB += (builder.kernel.getNumRegTotal() + builder.getNumScalarRegisters()) *
12481265
builder.numEltPerGRF<Type_UB>();
12491266
RB += (builder.kernel.getNumRegTotal() + builder.getNumScalarRegisters()) *
@@ -4627,8 +4644,7 @@ void SWSB::insertTokenSync() {
46274644
syncInst->setDistance(1);
46284645
if (kernel.fg.builder->hasThreeALUPipes() ||
46294646
kernel.fg.builder->hasFourALUPipes()) {
4630-
syncInst->setDistanceTypeXe(
4631-
G4_INST::DistanceType::DISTALL);
4647+
syncInst->setDistanceTypeXe(G4_INST::DistanceType::DISTALL);
46324648
}
46334649
}
46344650
}
@@ -5664,9 +5680,15 @@ bool G4_BB_SB::getFootprintForOperand(SBNode *node, G4_INST *inst,
56645680
}
56655681
}
56665682

5667-
if (builder.needA0WARForSend() && isA0) {
5683+
if (builder.needA0WAR() && isA0) {
56685684
footprint = getFootprintForA0(opnd, opndNum, inst);
5669-
node->setFootprint(footprint, opndNum);
5685+
if (opndNum == Opnd_dst && opnd->asDstRegRegion()->isIndirect()) {
5686+
// Indirect will only be used in the src0~src2, using Opnd_src4 as the
5687+
// indirect used in dst
5688+
node->setFootprint(footprint, Opnd_src4);
5689+
} else {
5690+
node->setFootprint(footprint, opndNum);
5691+
}
56705692
}
56715693

56725694

@@ -5748,7 +5770,8 @@ void G4_BB_SB::getGRFBuckets(const SBFootprint *footprint,
57485770
std::vector<SBBucketDesc> &BDvec, bool GRFOnly) {
57495771
for (const SBFootprint *curFootprint = footprint; curFootprint != nullptr;
57505772
curFootprint = curFootprint->next) {
5751-
if (GRFOnly && (curFootprint->fType != GRF_T)) {
5773+
if (GRFOnly && (curFootprint->fType != GRF_T) &&
5774+
(curFootprint->fType != A0_T)) {
57525775
continue;
57535776
}
57545777

@@ -5797,7 +5820,8 @@ void G4_BB_SB::getGRFBucketsForOperands(SBNode *node,
57975820
for (Gen4_Operand_Number opndNum = first_opnd; opndNum <= last_opnd;
57985821
opndNum = (Gen4_Operand_Number)(opndNum + 1)) {
57995822
const SBFootprint *footprint = node->getFirstFootprint(opndNum);
5800-
if (!footprint || (GRFOnly && (footprint->fType != GRF_T))) {
5823+
if (!footprint || (GRFOnly && (footprint->fType != GRF_T) &&
5824+
(footprint->fType != A0_T))) {
58015825
continue;
58025826
}
58035827
getGRFBuckets(footprint, opndNum, BDvec, GRFOnly);
@@ -6074,21 +6098,14 @@ void G4_BB_SB::setDistance(const SBFootprint *footprint, SBNode *node,
60746098
}
60756099

60766100
void G4_BB_SB::setSpecialDistance(SBNode *node) {
6077-
G4_INST *inst = node->GetInstruction();
6078-
if (!inst->getDst()) {
6079-
return;
6080-
}
6081-
6082-
if (inst->getDst()->isDirectA0()) {
6083-
SBDISTDEP_ITEM depItem;
6084-
depItem.liveNodePipe = PIPE_FLOAT;
6085-
depItem.nodePipe = node->ALUPipe;
6086-
depItem.operandType = PIPE_INT;
6087-
depItem.dstDep = false;
6088-
node->setDistance(1);
6089-
node->distDep.push_back(depItem);
6090-
node->setDistInfo(PIPE_FLOAT, 1);
6091-
}
6101+
SBDISTDEP_ITEM depItem;
6102+
depItem.liveNodePipe = PIPE_FLOAT;
6103+
depItem.nodePipe = node->ALUPipe;
6104+
depItem.operandType = PIPE_INT;
6105+
depItem.dstDep = false;
6106+
node->setDistance(1);
6107+
node->distDep.push_back(depItem);
6108+
node->setDistInfo(PIPE_FLOAT, 1);
60926109

60936110
return;
60946111
}
@@ -6577,8 +6594,17 @@ void G4_BB_SB::SBDDD(G4_BB *bb, LiveGRFBuckets *&LB,
65776594

65786595
if (builder.hasA0WARHWissue() &&
65796596
(builder.hasThreeALUPipes() || builder.hasFourALUPipes())) {
6580-
setSpecialDistance(node);
6597+
if (curInst->getDst() && curInst->getDst()->isDirectA0()) {
6598+
setSpecialDistance(node);
6599+
}
6600+
} else if (builder.needA0WAR()) {
6601+
if (!indexes->setFirstA0 && curInst->getDst() &&
6602+
curInst->getDst()->isDirectA0()) {
6603+
indexes->setFirstA0 = 1;
6604+
setSpecialDistance(node);
6605+
}
65816606
}
6607+
65826608
// Record the node IDs of the instructions in BB
65836609
if (first_node == INVALID_ID) {
65846610
first_node = nodeID;
@@ -6981,13 +7007,16 @@ void G4_BB_SB::SBDDD(G4_BB *bb, LiveGRFBuckets *&LB,
69817007
continue;
69827008
}
69837009

6984-
if (builder.needA0WARForSend() &&
6985-
curBucket == builder.kernel.getNumRegTotal() +
6986-
builder.getNumScalarRegisters()) {
6987-
if (!tokenHonourInstruction(liveInst) || dep != WAR ||
6988-
hasSameFunctionID(liveInst, curInst)) {
6989-
++bn_it;
6990-
continue;
7010+
if (builder.needA0WAR()) {
7011+
const int A0_start =
7012+
builder.kernel.getNumRegTotal() + builder.getNumScalarRegisters();
7013+
const int A0_end =
7014+
A0_start + builder.getNumAddrRegistersInGRFSizeSWSB() - 1;
7015+
if (curBucket >= A0_start && curBucket <= A0_end) {
7016+
if (dep != WAR) {
7017+
++bn_it;
7018+
continue;
7019+
}
69917020
}
69927021
}
69937022

@@ -7100,7 +7129,7 @@ void G4_BB_SB::SBDDD(G4_BB *bb, LiveGRFBuckets *&LB,
71007129
if (distanceHonourInstruction(liveInst)) {
71017130
if (dep == RAW &&
71027131
(curBucket < globalRegisterNum)) { // Only need track GRF
7103-
// RAW dependence
7132+
// RAW dependence
71047133
LB->killOperand(bn_it);
71057134
setDistance(curFootprint, node, liveNode, false);
71067135
liveNode->setInstKilled(true); // Instrtuction level kill
@@ -7769,7 +7798,7 @@ void G4_BB_SB::getLiveBucketsFromFootprint(
77697798
for (const SBFootprint *footprint = firstFootprint; footprint != nullptr;
77707799
footprint = footprint->next) {
77717800
// We only track the global dependence for GRF
7772-
if (footprint->fType != GRF_T) {
7801+
if ((footprint->fType != GRF_T) && (footprint->fType != A0_T)) {
77737802
continue;
77747803
}
77757804

visa/LocalScheduler/SWSB_G4IR.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,7 @@ typedef std::list<G4_BB_SB *> BB_SWSB_LIST;
599599
typedef BB_SWSB_LIST::iterator BB_SWSB_LIST_ITER;
600600

601601
typedef struct _SWSB_INDEXES {
602+
int setFirstA0 = 0;
602603
int instIndex = 0;
603604
int ALUIndex = 0;
604605
int integerIndex = 0;
@@ -661,7 +662,6 @@ class G4_BB_SB {
661662

662663
int send_start = -1;
663664
int send_end = -1;
664-
665665
unsigned loopStartBBID = -1; // The start BB ID of live range
666666
unsigned loopEndBBID = -1; // The start BB ID of live range
667667

@@ -712,8 +712,8 @@ class G4_BB_SB {
712712
last_send_node = -1;
713713
totalGRFNum = builder.kernel.getNumRegTotal();
714714
globalRegisterNum = totalGRFNum + builder.getNumScalarRegisters();
715-
if (builder.needA0WARForSend()) {
716-
globalRegisterNum += builder.getGRFNumOfAddrRegisters();
715+
if (builder.needA0WAR()) {
716+
globalRegisterNum += builder.getNumAddrRegistersInGRFSizeSWSB();
717717
}
718718

719719
SBDDD(bb, lb, globalLB, GRFAlignedGlobalSendsLB, SBNodes, SBSendNodes,
@@ -1152,8 +1152,8 @@ class SWSB {
11521152
{
11531153
globalRegisterNum =
11541154
kernel.getNumRegTotal() + k.fg.builder->getNumScalarRegisters();
1155-
if (k.fg.builder->needA0WARForSend()) {
1156-
globalRegisterNum += k.fg.builder->getGRFNumOfAddrRegisters();
1155+
if (k.fg.builder->needA0WAR()) {
1156+
globalRegisterNum += k.fg.builder->getNumAddrRegistersInGRFSizeSWSB();
11571157
}
11581158

11591159
indexes.instIndex = 0;

0 commit comments

Comments
 (0)