Skip to content

Commit b13a1f8

Browse files
bcheng0127igcbot
authored andcommitted
gather send optimization
gather send optimization
1 parent e37fe2e commit b13a1f8

File tree

5 files changed

+243
-55
lines changed

5 files changed

+243
-55
lines changed

visa/G4_Declare.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ class G4_Declare {
9494
uint16_t forceSpilled : 1;
9595
uint16_t exclusiveLoad : 1;
9696
uint16_t isCmpUseOnly : 1;
97+
// indicate if the declare is local referenced only
98+
// Especially for the variable with pseodu_kill,
99+
// while will be removed in removeLifetimeOps pass.
100+
uint16_t isBBLocal : 1;
97101

98102
unsigned declId; // global decl id for this builder
99103

@@ -334,6 +338,9 @@ class G4_Declare {
334338
void setIsCmpUseOnly(bool b) { isCmpUseOnly = b; }
335339
bool getIsCmpUseOnly() const { return isCmpUseOnly; }
336340

341+
void setIsBBLocal(bool b) { isBBLocal = b; }
342+
bool getIsBBLocal() const { return isBBLocal; }
343+
337344
unsigned getNumRegNeeded() const;
338345

339346
void emit(std::ostream &output) const;

visa/LocalDataflow.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,47 @@ static void processReadOpnds(G4_BB *BB, G4_INST *Inst, LocalLivenessInfo &LLI) {
453453
}
454454
}
455455

456+
static void
457+
processReadOpndsForPseudoKill(G4_BB *BB, G4_INST *Inst,
458+
std::unordered_set<G4_Declare *> &pseudoKills) {
459+
if (Inst->isPseudoKill()) {
460+
return;
461+
}
462+
// (1) Indirect dst operand reads address.
463+
G4_DstRegRegion *Dst = Inst->getDst();
464+
if (Dst && Dst->isIndirect()) {
465+
G4_Declare *dcl = Dst->getTopDcl();
466+
if (pseudoKills.find(dcl) != pseudoKills.end()) {
467+
dcl->setIsBBLocal(false);
468+
pseudoKills.erase(dcl);
469+
}
470+
}
471+
472+
// (2) Direct and indirect source operands.
473+
for (auto OpNum :
474+
{Gen4_Operand_Number::Opnd_src0, Gen4_Operand_Number::Opnd_src1,
475+
Gen4_Operand_Number::Opnd_src2, Gen4_Operand_Number::Opnd_src3,
476+
Gen4_Operand_Number::Opnd_src4, Gen4_Operand_Number::Opnd_src5,
477+
Gen4_Operand_Number::Opnd_src6, Gen4_Operand_Number::Opnd_src7,
478+
Gen4_Operand_Number::Opnd_pred, Gen4_Operand_Number::Opnd_implAccSrc}) {
479+
G4_Operand *opnd = Inst->getOperand(OpNum);
480+
if (opnd == nullptr || opnd->isImm() || opnd->isNullReg() ||
481+
opnd->isLabel())
482+
continue;
483+
484+
G4_Declare *dcl = nullptr;
485+
if (Inst->isPseudoAddrMovIntrinsic()) {
486+
dcl =opnd->asAddrExp()->getRegVar()->getDeclare();
487+
} else {
488+
dcl = opnd->getTopDcl();
489+
}
490+
if (pseudoKills.find(dcl) != pseudoKills.end()) {
491+
dcl->setIsBBLocal(false);
492+
pseudoKills.erase(dcl);
493+
}
494+
}
495+
}
496+
456497
// Process writes. If this is a partial definition, then record this partial
457498
// definition. When all partial definitions together define this live read node,
458499
// it is killed and du/ud links are added.
@@ -487,6 +528,12 @@ static void processWriteOpnds(G4_BB *BB, G4_INST *Inst,
487528
}
488529

489530
void FlowGraph::localDataFlowAnalysis() {
531+
// For pseudo kill varaible
532+
// If there is use exposed in a BB, it's treated as global.
533+
// Otherwise, it's treated as local even the same pseudo kill may appear in
534+
// multiple BBs
535+
std::unordered_set<G4_Declare *> pesudoKilledDcls;
536+
490537
for (auto BB : BBs) {
491538
LocalLivenessInfo LLI(!BB->isAllLaneActive());
492539
for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
@@ -504,7 +551,24 @@ void FlowGraph::localDataFlowAnalysis() {
504551
continue;
505552
}
506553
processWriteOpnds(BB, Inst, LLI);
554+
555+
if (Inst->isPseudoKill() && Inst->getDst() && !Inst->getDst()->isNullReg()) {
556+
G4_Declare *dcl = Inst->getDst()->getTopDcl();
557+
pesudoKilledDcls.insert(dcl);
558+
// In case the use in anther BB is analyzed before define
559+
if (!globalOpndHT.isOpndGlobal(Inst->getDst())) {
560+
G4_Declare *dcl = Inst->getDst()->getTopDcl();
561+
dcl->setIsBBLocal(true);
562+
}
563+
}
564+
507565
processReadOpnds(BB, Inst, LLI);
566+
if (pesudoKilledDcls
567+
.size()) { // Process the operand using variable which
568+
// has psuedo kill. Since the scan is from back to
569+
// front, exposed use will make variable global
570+
processReadOpndsForPseudoKill(BB, Inst, pesudoKilledDcls);
571+
}
508572
}
509573

510574
// All left over live nodes are global.

visa/Optimizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -834,7 +834,7 @@ void Optimizer::s0SubAfterRA() {
834834
kernel.fg.resetLocalDataFlowData();
835835
kernel.fg.localDataFlowAnalysis();
836836

837-
SRSubPassBeforeRA s0Sub(builder, kernel);
837+
SRSubPassAfterRA s0Sub(builder, kernel);
838838
s0Sub.run();
839839
}
840840

0 commit comments

Comments
 (0)