Skip to content

Commit a3f667b

Browse files
authored
[AMDGPU][SIInsertWaitCnts] Remove redundant TII/TRI/MRI arguments (NFC) (#161357)
WaitCntBrackets already has a pointer to its SIInsertWaitCnt instance. With a small change, it can directly access TII/TRI/MRI that way. This simplifies a lot of call sites which make the code easier to follow.
1 parent 5c50bdc commit a3f667b

File tree

1 file changed

+54
-67
lines changed

1 file changed

+54
-67
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 54 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -418,15 +418,14 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
418418
class SIInsertWaitcnts {
419419
public:
420420
const GCNSubtarget *ST;
421+
const SIInstrInfo *TII = nullptr;
422+
const SIRegisterInfo *TRI = nullptr;
423+
const MachineRegisterInfo *MRI = nullptr;
421424
InstCounterType SmemAccessCounter;
422425
InstCounterType MaxCounter;
423426
const unsigned *WaitEventMaskForInst;
424427

425428
private:
426-
const SIInstrInfo *TII = nullptr;
427-
const SIRegisterInfo *TRI = nullptr;
428-
const MachineRegisterInfo *MRI = nullptr;
429-
430429
DenseMap<const Value *, MachineBasicBlock *> SLoadAddresses;
431430
DenseMap<MachineBasicBlock *, bool> PreheadersToFlush;
432431
MachineLoopInfo *MLI;
@@ -631,8 +630,6 @@ class WaitcntBrackets {
631630
bool merge(const WaitcntBrackets &Other);
632631

633632
RegInterval getRegInterval(const MachineInstr *MI,
634-
const MachineRegisterInfo *MRI,
635-
const SIRegisterInfo *TRI,
636633
const MachineOperand &Op) const;
637634

638635
bool counterOutOfOrder(InstCounterType T) const;
@@ -650,9 +647,7 @@ class WaitcntBrackets {
650647
void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
651648
void applyWaitcnt(InstCounterType T, unsigned Count);
652649
void applyXcnt(const AMDGPU::Waitcnt &Wait);
653-
void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
654-
const MachineRegisterInfo *MRI, WaitEventType E,
655-
MachineInstr &MI);
650+
void updateByEvent(WaitEventType E, MachineInstr &MI);
656651

657652
unsigned hasPendingEvent() const { return PendingEvents; }
658653
unsigned hasPendingEvent(WaitEventType E) const {
@@ -761,10 +756,8 @@ class WaitcntBrackets {
761756
void setScoreByInterval(RegInterval Interval, InstCounterType CntTy,
762757
unsigned Score);
763758

764-
void setScoreByOperand(const MachineInstr *MI, const SIRegisterInfo *TRI,
765-
const MachineRegisterInfo *MRI,
766-
const MachineOperand &Op, InstCounterType CntTy,
767-
unsigned Val);
759+
void setScoreByOperand(const MachineInstr *MI, const MachineOperand &Op,
760+
InstCounterType CntTy, unsigned Val);
768761

769762
const SIInsertWaitcnts *Context;
770763

@@ -821,12 +814,13 @@ class SIInsertWaitcntsLegacy : public MachineFunctionPass {
821814
} // end anonymous namespace
822815

823816
RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
824-
const MachineRegisterInfo *MRI,
825-
const SIRegisterInfo *TRI,
826817
const MachineOperand &Op) const {
827818
if (Op.getReg() == AMDGPU::SCC)
828819
return {SCC, SCC + 1};
829820

821+
const SIRegisterInfo *TRI = Context->TRI;
822+
const MachineRegisterInfo *MRI = Context->MRI;
823+
830824
if (!TRI->isInAllocatableClass(Op.getReg()))
831825
return {-1, -1};
832826

@@ -891,11 +885,9 @@ void WaitcntBrackets::setScoreByInterval(RegInterval Interval,
891885
}
892886

893887
void WaitcntBrackets::setScoreByOperand(const MachineInstr *MI,
894-
const SIRegisterInfo *TRI,
895-
const MachineRegisterInfo *MRI,
896888
const MachineOperand &Op,
897889
InstCounterType CntTy, unsigned Score) {
898-
RegInterval Interval = getRegInterval(MI, MRI, TRI, Op);
890+
RegInterval Interval = getRegInterval(MI, Op);
899891
setScoreByInterval(Interval, CntTy, Score);
900892
}
901893

@@ -927,10 +919,7 @@ bool WaitcntBrackets::hasPointSamplePendingVmemTypes(
927919
return hasOtherPendingVmemTypes(Interval, VMEM_NOSAMPLER);
928920
}
929921

930-
void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
931-
const SIRegisterInfo *TRI,
932-
const MachineRegisterInfo *MRI,
933-
WaitEventType E, MachineInstr &Inst) {
922+
void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) {
934923
InstCounterType T = eventCounter(Context->WaitEventMaskForInst, E);
935924

936925
unsigned UB = getScoreUB(T);
@@ -943,66 +932,67 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
943932
PendingEvents |= 1 << E;
944933
setScoreUB(T, CurrScore);
945934

935+
const SIRegisterInfo *TRI = Context->TRI;
936+
const MachineRegisterInfo *MRI = Context->MRI;
937+
const SIInstrInfo *TII = Context->TII;
938+
946939
if (T == EXP_CNT) {
947940
// Put score on the source vgprs. If this is a store, just use those
948941
// specific register(s).
949942
if (TII->isDS(Inst) && Inst.mayLoadOrStore()) {
950943
// All GDS operations must protect their address register (same as
951944
// export.)
952945
if (const auto *AddrOp = TII->getNamedOperand(Inst, AMDGPU::OpName::addr))
953-
setScoreByOperand(&Inst, TRI, MRI, *AddrOp, EXP_CNT, CurrScore);
946+
setScoreByOperand(&Inst, *AddrOp, EXP_CNT, CurrScore);
954947

955948
if (Inst.mayStore()) {
956949
if (const auto *Data0 =
957950
TII->getNamedOperand(Inst, AMDGPU::OpName::data0))
958-
setScoreByOperand(&Inst, TRI, MRI, *Data0, EXP_CNT, CurrScore);
951+
setScoreByOperand(&Inst, *Data0, EXP_CNT, CurrScore);
959952
if (const auto *Data1 =
960953
TII->getNamedOperand(Inst, AMDGPU::OpName::data1))
961-
setScoreByOperand(&Inst, TRI, MRI, *Data1, EXP_CNT, CurrScore);
954+
setScoreByOperand(&Inst, *Data1, EXP_CNT, CurrScore);
962955
} else if (SIInstrInfo::isAtomicRet(Inst) && !SIInstrInfo::isGWS(Inst) &&
963956
Inst.getOpcode() != AMDGPU::DS_APPEND &&
964957
Inst.getOpcode() != AMDGPU::DS_CONSUME &&
965958
Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) {
966959
for (const MachineOperand &Op : Inst.all_uses()) {
967960
if (TRI->isVectorRegister(*MRI, Op.getReg()))
968-
setScoreByOperand(&Inst, TRI, MRI, Op, EXP_CNT, CurrScore);
961+
setScoreByOperand(&Inst, Op, EXP_CNT, CurrScore);
969962
}
970963
}
971964
} else if (TII->isFLAT(Inst)) {
972965
if (Inst.mayStore()) {
973-
setScoreByOperand(&Inst, TRI, MRI,
966+
setScoreByOperand(&Inst,
974967
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
975968
EXP_CNT, CurrScore);
976969
} else if (SIInstrInfo::isAtomicRet(Inst)) {
977-
setScoreByOperand(&Inst, TRI, MRI,
970+
setScoreByOperand(&Inst,
978971
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
979972
EXP_CNT, CurrScore);
980973
}
981974
} else if (TII->isMIMG(Inst)) {
982975
if (Inst.mayStore()) {
983-
setScoreByOperand(&Inst, TRI, MRI, Inst.getOperand(0), EXP_CNT,
984-
CurrScore);
976+
setScoreByOperand(&Inst, Inst.getOperand(0), EXP_CNT, CurrScore);
985977
} else if (SIInstrInfo::isAtomicRet(Inst)) {
986-
setScoreByOperand(&Inst, TRI, MRI,
978+
setScoreByOperand(&Inst,
987979
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
988980
EXP_CNT, CurrScore);
989981
}
990982
} else if (TII->isMTBUF(Inst)) {
991983
if (Inst.mayStore())
992-
setScoreByOperand(&Inst, TRI, MRI, Inst.getOperand(0), EXP_CNT,
993-
CurrScore);
984+
setScoreByOperand(&Inst, Inst.getOperand(0), EXP_CNT, CurrScore);
994985
} else if (TII->isMUBUF(Inst)) {
995986
if (Inst.mayStore()) {
996-
setScoreByOperand(&Inst, TRI, MRI, Inst.getOperand(0), EXP_CNT,
997-
CurrScore);
987+
setScoreByOperand(&Inst, Inst.getOperand(0), EXP_CNT, CurrScore);
998988
} else if (SIInstrInfo::isAtomicRet(Inst)) {
999-
setScoreByOperand(&Inst, TRI, MRI,
989+
setScoreByOperand(&Inst,
1000990
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
1001991
EXP_CNT, CurrScore);
1002992
}
1003993
} else if (TII->isLDSDIR(Inst)) {
1004994
// LDSDIR instructions attach the score to the destination.
1005-
setScoreByOperand(&Inst, TRI, MRI,
995+
setScoreByOperand(&Inst,
1006996
*TII->getNamedOperand(Inst, AMDGPU::OpName::vdst),
1007997
EXP_CNT, CurrScore);
1008998
} else {
@@ -1013,18 +1003,18 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
10131003
// score.
10141004
for (MachineOperand &DefMO : Inst.all_defs()) {
10151005
if (TRI->isVGPR(*MRI, DefMO.getReg())) {
1016-
setScoreByOperand(&Inst, TRI, MRI, DefMO, EXP_CNT, CurrScore);
1006+
setScoreByOperand(&Inst, DefMO, EXP_CNT, CurrScore);
10171007
}
10181008
}
10191009
}
10201010
for (const MachineOperand &Op : Inst.all_uses()) {
10211011
if (TRI->isVectorRegister(*MRI, Op.getReg()))
1022-
setScoreByOperand(&Inst, TRI, MRI, Op, EXP_CNT, CurrScore);
1012+
setScoreByOperand(&Inst, Op, EXP_CNT, CurrScore);
10231013
}
10241014
}
10251015
} else if (T == X_CNT) {
10261016
for (const MachineOperand &Op : Inst.all_uses())
1027-
setScoreByOperand(&Inst, TRI, MRI, Op, T, CurrScore);
1017+
setScoreByOperand(&Inst, Op, T, CurrScore);
10281018
} else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ {
10291019
// Match the score to the destination registers.
10301020
//
@@ -1036,7 +1026,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
10361026
// Special cases where implicit register defs exists, such as M0 or VCC,
10371027
// but none with memory instructions.
10381028
for (const MachineOperand &Op : Inst.defs()) {
1039-
RegInterval Interval = getRegInterval(&Inst, MRI, TRI, Op);
1029+
RegInterval Interval = getRegInterval(&Inst, Op);
10401030
if (T == LOAD_CNT || T == SAMPLE_CNT || T == BVH_CNT) {
10411031
if (Interval.first >= NUM_ALL_VGPRS)
10421032
continue;
@@ -1928,15 +1918,15 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
19281918
const auto &CallAddrOp = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
19291919
if (CallAddrOp.isReg()) {
19301920
RegInterval CallAddrOpInterval =
1931-
ScoreBrackets.getRegInterval(&MI, MRI, TRI, CallAddrOp);
1921+
ScoreBrackets.getRegInterval(&MI, CallAddrOp);
19321922

19331923
ScoreBrackets.determineWait(SmemAccessCounter, CallAddrOpInterval,
19341924
Wait);
19351925

19361926
if (const auto *RtnAddrOp =
19371927
TII->getNamedOperand(MI, AMDGPU::OpName::dst)) {
19381928
RegInterval RtnAddrOpInterval =
1939-
ScoreBrackets.getRegInterval(&MI, MRI, TRI, *RtnAddrOp);
1929+
ScoreBrackets.getRegInterval(&MI, *RtnAddrOp);
19401930

19411931
ScoreBrackets.determineWait(SmemAccessCounter, RtnAddrOpInterval,
19421932
Wait);
@@ -2000,7 +1990,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
20001990
if (Op.isTied() && Op.isUse() && TII->doesNotReadTiedSource(MI))
20011991
continue;
20021992

2003-
RegInterval Interval = ScoreBrackets.getRegInterval(&MI, MRI, TRI, Op);
1993+
RegInterval Interval = ScoreBrackets.getRegInterval(&MI, Op);
20041994

20051995
const bool IsVGPR = TRI->isVectorRegister(*MRI, Op.getReg());
20061996
if (IsVGPR) {
@@ -2237,16 +2227,15 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
22372227
if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) {
22382228
if (TII->isAlwaysGDS(Inst.getOpcode()) ||
22392229
TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
2240-
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
2241-
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
2230+
ScoreBrackets->updateByEvent(GDS_ACCESS, Inst);
2231+
ScoreBrackets->updateByEvent(GDS_GPR_LOCK, Inst);
22422232
ScoreBrackets->setPendingGDS();
22432233
} else {
2244-
ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
2234+
ScoreBrackets->updateByEvent(LDS_ACCESS, Inst);
22452235
}
22462236
} else if (TII->isFLAT(Inst)) {
22472237
if (SIInstrInfo::isGFX12CacheInvOrWBInst(Inst.getOpcode())) {
2248-
ScoreBrackets->updateByEvent(TII, TRI, MRI, getVmemWaitEventType(Inst),
2249-
Inst);
2238+
ScoreBrackets->updateByEvent(getVmemWaitEventType(Inst), Inst);
22502239
return;
22512240
}
22522241

@@ -2257,13 +2246,12 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
22572246
if (TII->mayAccessVMEMThroughFlat(Inst)) {
22582247
++FlatASCount;
22592248
IsVMEMAccess = true;
2260-
ScoreBrackets->updateByEvent(TII, TRI, MRI, getVmemWaitEventType(Inst),
2261-
Inst);
2249+
ScoreBrackets->updateByEvent(getVmemWaitEventType(Inst), Inst);
22622250
}
22632251

22642252
if (TII->mayAccessLDSThroughFlat(Inst)) {
22652253
++FlatASCount;
2266-
ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
2254+
ScoreBrackets->updateByEvent(LDS_ACCESS, Inst);
22672255
}
22682256

22692257
// This is a flat memory operation that access both VMEM and LDS, so note it
@@ -2274,16 +2262,15 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
22742262
} else if (SIInstrInfo::isVMEM(Inst) &&
22752263
!llvm::AMDGPU::getMUBUFIsBufferInv(Inst.getOpcode())) {
22762264
IsVMEMAccess = true;
2277-
ScoreBrackets->updateByEvent(TII, TRI, MRI, getVmemWaitEventType(Inst),
2278-
Inst);
2265+
ScoreBrackets->updateByEvent(getVmemWaitEventType(Inst), Inst);
22792266

22802267
if (ST->vmemWriteNeedsExpWaitcnt() &&
22812268
(Inst.mayStore() || SIInstrInfo::isAtomicRet(Inst))) {
2282-
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
2269+
ScoreBrackets->updateByEvent(VMW_GPR_LOCK, Inst);
22832270
}
22842271
} else if (TII->isSMRD(Inst)) {
22852272
IsSMEMAccess = true;
2286-
ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
2273+
ScoreBrackets->updateByEvent(SMEM_ACCESS, Inst);
22872274
} else if (Inst.isCall()) {
22882275
if (callWaitsOnFunctionReturn(Inst)) {
22892276
// Act as a wait on everything
@@ -2295,33 +2282,33 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
22952282
ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt());
22962283
}
22972284
} else if (SIInstrInfo::isLDSDIR(Inst)) {
2298-
ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_LDS_ACCESS, Inst);
2285+
ScoreBrackets->updateByEvent(EXP_LDS_ACCESS, Inst);
22992286
} else if (TII->isVINTERP(Inst)) {
23002287
int64_t Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::waitexp)->getImm();
23012288
ScoreBrackets->applyWaitcnt(EXP_CNT, Imm);
23022289
} else if (SIInstrInfo::isEXP(Inst)) {
23032290
unsigned Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
23042291
if (Imm >= AMDGPU::Exp::ET_PARAM0 && Imm <= AMDGPU::Exp::ET_PARAM31)
2305-
ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_PARAM_ACCESS, Inst);
2292+
ScoreBrackets->updateByEvent(EXP_PARAM_ACCESS, Inst);
23062293
else if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST)
2307-
ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_POS_ACCESS, Inst);
2294+
ScoreBrackets->updateByEvent(EXP_POS_ACCESS, Inst);
23082295
else
2309-
ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_GPR_LOCK, Inst);
2296+
ScoreBrackets->updateByEvent(EXP_GPR_LOCK, Inst);
23102297
} else if (SIInstrInfo::isSBarrierSCCWrite(Inst.getOpcode())) {
2311-
ScoreBrackets->updateByEvent(TII, TRI, MRI, SCC_WRITE, Inst);
2298+
ScoreBrackets->updateByEvent(SCC_WRITE, Inst);
23122299
} else {
23132300
switch (Inst.getOpcode()) {
23142301
case AMDGPU::S_SENDMSG:
23152302
case AMDGPU::S_SENDMSG_RTN_B32:
23162303
case AMDGPU::S_SENDMSG_RTN_B64:
23172304
case AMDGPU::S_SENDMSGHALT:
2318-
ScoreBrackets->updateByEvent(TII, TRI, MRI, SQ_MESSAGE, Inst);
2305+
ScoreBrackets->updateByEvent(SQ_MESSAGE, Inst);
23192306
break;
23202307
case AMDGPU::S_MEMTIME:
23212308
case AMDGPU::S_MEMREALTIME:
23222309
case AMDGPU::S_GET_BARRIER_STATE_M0:
23232310
case AMDGPU::S_GET_BARRIER_STATE_IMM:
2324-
ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
2311+
ScoreBrackets->updateByEvent(SMEM_ACCESS, Inst);
23252312
break;
23262313
}
23272314
}
@@ -2330,10 +2317,10 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
23302317
return;
23312318

23322319
if (IsVMEMAccess)
2333-
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_GROUP, Inst);
2320+
ScoreBrackets->updateByEvent(VMEM_GROUP, Inst);
23342321

23352322
if (IsSMEMAccess)
2336-
ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_GROUP, Inst);
2323+
ScoreBrackets->updateByEvent(SMEM_GROUP, Inst);
23372324
}
23382325

23392326
bool WaitcntBrackets::mergeScore(const MergeInfo &M, unsigned &Score,
@@ -2637,7 +2624,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
26372624
for (const MachineOperand &Op : MI.all_uses()) {
26382625
if (Op.isDebug() || !TRI->isVectorRegister(*MRI, Op.getReg()))
26392626
continue;
2640-
RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
2627+
RegInterval Interval = Brackets.getRegInterval(&MI, Op);
26412628
// Vgpr use
26422629
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
26432630
// If we find a register that is loaded inside the loop, 1. and 2.
@@ -2662,7 +2649,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
26622649
// VMem load vgpr def
26632650
if (isVMEMOrFlatVMEM(MI) && MI.mayLoad()) {
26642651
for (const MachineOperand &Op : MI.all_defs()) {
2665-
RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
2652+
RegInterval Interval = Brackets.getRegInterval(&MI, Op);
26662653
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
26672654
// If we find a register that is loaded inside the loop, 1. and 2.
26682655
// are invalidated and we can exit.

0 commit comments

Comments
 (0)