@@ -418,15 +418,14 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
418418class SIInsertWaitcnts {
419419public:
420420 const GCNSubtarget *ST;
421+ const SIInstrInfo *TII = nullptr ;
422+ const SIRegisterInfo *TRI = nullptr ;
423+ const MachineRegisterInfo *MRI = nullptr ;
421424 InstCounterType SmemAccessCounter;
422425 InstCounterType MaxCounter;
423426 const unsigned *WaitEventMaskForInst;
424427
425428private:
426- const SIInstrInfo *TII = nullptr ;
427- const SIRegisterInfo *TRI = nullptr ;
428- const MachineRegisterInfo *MRI = nullptr ;
429-
430429 DenseMap<const Value *, MachineBasicBlock *> SLoadAddresses;
431430 DenseMap<MachineBasicBlock *, bool > PreheadersToFlush;
432431 MachineLoopInfo *MLI;
@@ -631,8 +630,6 @@ class WaitcntBrackets {
631630 bool merge (const WaitcntBrackets &Other);
632631
633632 RegInterval getRegInterval (const MachineInstr *MI,
634- const MachineRegisterInfo *MRI,
635- const SIRegisterInfo *TRI,
636633 const MachineOperand &Op) const ;
637634
638635 bool counterOutOfOrder (InstCounterType T) const ;
@@ -650,9 +647,7 @@ class WaitcntBrackets {
650647 void applyWaitcnt (const AMDGPU::Waitcnt &Wait);
651648 void applyWaitcnt (InstCounterType T, unsigned Count);
652649 void applyXcnt (const AMDGPU::Waitcnt &Wait);
653- void updateByEvent (const SIInstrInfo *TII, const SIRegisterInfo *TRI,
654- const MachineRegisterInfo *MRI, WaitEventType E,
655- MachineInstr &MI);
650+ void updateByEvent (WaitEventType E, MachineInstr &MI);
656651
657652 unsigned hasPendingEvent () const { return PendingEvents; }
658653 unsigned hasPendingEvent (WaitEventType E) const {
@@ -761,10 +756,8 @@ class WaitcntBrackets {
761756 void setScoreByInterval (RegInterval Interval, InstCounterType CntTy,
762757 unsigned Score);
763758
764- void setScoreByOperand (const MachineInstr *MI, const SIRegisterInfo *TRI,
765- const MachineRegisterInfo *MRI,
766- const MachineOperand &Op, InstCounterType CntTy,
767- unsigned Val);
759+ void setScoreByOperand (const MachineInstr *MI, const MachineOperand &Op,
760+ InstCounterType CntTy, unsigned Val);
768761
769762 const SIInsertWaitcnts *Context;
770763
@@ -821,12 +814,13 @@ class SIInsertWaitcntsLegacy : public MachineFunctionPass {
821814} // end anonymous namespace
822815
823816RegInterval WaitcntBrackets::getRegInterval (const MachineInstr *MI,
824- const MachineRegisterInfo *MRI,
825- const SIRegisterInfo *TRI,
826817 const MachineOperand &Op) const {
827818 if (Op.getReg () == AMDGPU::SCC)
828819 return {SCC, SCC + 1 };
829820
821+ const SIRegisterInfo *TRI = Context->TRI ;
822+ const MachineRegisterInfo *MRI = Context->MRI ;
823+
830824 if (!TRI->isInAllocatableClass (Op.getReg ()))
831825 return {-1 , -1 };
832826
@@ -891,11 +885,9 @@ void WaitcntBrackets::setScoreByInterval(RegInterval Interval,
891885}
892886
893887void WaitcntBrackets::setScoreByOperand (const MachineInstr *MI,
894- const SIRegisterInfo *TRI,
895- const MachineRegisterInfo *MRI,
896888 const MachineOperand &Op,
897889 InstCounterType CntTy, unsigned Score) {
898- RegInterval Interval = getRegInterval (MI, MRI, TRI, Op);
890+ RegInterval Interval = getRegInterval (MI, Op);
899891 setScoreByInterval (Interval, CntTy, Score);
900892}
901893
@@ -927,10 +919,7 @@ bool WaitcntBrackets::hasPointSamplePendingVmemTypes(
927919 return hasOtherPendingVmemTypes (Interval, VMEM_NOSAMPLER);
928920}
929921
930- void WaitcntBrackets::updateByEvent (const SIInstrInfo *TII,
931- const SIRegisterInfo *TRI,
932- const MachineRegisterInfo *MRI,
933- WaitEventType E, MachineInstr &Inst) {
922+ void WaitcntBrackets::updateByEvent (WaitEventType E, MachineInstr &Inst) {
934923 InstCounterType T = eventCounter (Context->WaitEventMaskForInst , E);
935924
936925 unsigned UB = getScoreUB (T);
@@ -943,66 +932,67 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
943932 PendingEvents |= 1 << E;
944933 setScoreUB (T, CurrScore);
945934
935+ const SIRegisterInfo *TRI = Context->TRI ;
936+ const MachineRegisterInfo *MRI = Context->MRI ;
937+ const SIInstrInfo *TII = Context->TII ;
938+
946939 if (T == EXP_CNT) {
947940 // Put score on the source vgprs. If this is a store, just use those
948941 // specific register(s).
949942 if (TII->isDS (Inst) && Inst.mayLoadOrStore ()) {
950943 // All GDS operations must protect their address register (same as
951944 // export.)
952945 if (const auto *AddrOp = TII->getNamedOperand (Inst, AMDGPU::OpName::addr))
953- setScoreByOperand (&Inst, TRI, MRI, *AddrOp, EXP_CNT, CurrScore);
946+ setScoreByOperand (&Inst, *AddrOp, EXP_CNT, CurrScore);
954947
955948 if (Inst.mayStore ()) {
956949 if (const auto *Data0 =
957950 TII->getNamedOperand (Inst, AMDGPU::OpName::data0))
958- setScoreByOperand (&Inst, TRI, MRI, *Data0, EXP_CNT, CurrScore);
951+ setScoreByOperand (&Inst, *Data0, EXP_CNT, CurrScore);
959952 if (const auto *Data1 =
960953 TII->getNamedOperand (Inst, AMDGPU::OpName::data1))
961- setScoreByOperand (&Inst, TRI, MRI, *Data1, EXP_CNT, CurrScore);
954+ setScoreByOperand (&Inst, *Data1, EXP_CNT, CurrScore);
962955 } else if (SIInstrInfo::isAtomicRet (Inst) && !SIInstrInfo::isGWS (Inst) &&
963956 Inst.getOpcode () != AMDGPU::DS_APPEND &&
964957 Inst.getOpcode () != AMDGPU::DS_CONSUME &&
965958 Inst.getOpcode () != AMDGPU::DS_ORDERED_COUNT) {
966959 for (const MachineOperand &Op : Inst.all_uses ()) {
967960 if (TRI->isVectorRegister (*MRI, Op.getReg ()))
968- setScoreByOperand (&Inst, TRI, MRI, Op, EXP_CNT, CurrScore);
961+ setScoreByOperand (&Inst, Op, EXP_CNT, CurrScore);
969962 }
970963 }
971964 } else if (TII->isFLAT (Inst)) {
972965 if (Inst.mayStore ()) {
973- setScoreByOperand (&Inst, TRI, MRI,
966+ setScoreByOperand (&Inst,
974967 *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
975968 EXP_CNT, CurrScore);
976969 } else if (SIInstrInfo::isAtomicRet (Inst)) {
977- setScoreByOperand (&Inst, TRI, MRI,
970+ setScoreByOperand (&Inst,
978971 *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
979972 EXP_CNT, CurrScore);
980973 }
981974 } else if (TII->isMIMG (Inst)) {
982975 if (Inst.mayStore ()) {
983- setScoreByOperand (&Inst, TRI, MRI, Inst.getOperand (0 ), EXP_CNT,
984- CurrScore);
976+ setScoreByOperand (&Inst, Inst.getOperand (0 ), EXP_CNT, CurrScore);
985977 } else if (SIInstrInfo::isAtomicRet (Inst)) {
986- setScoreByOperand (&Inst, TRI, MRI,
978+ setScoreByOperand (&Inst,
987979 *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
988980 EXP_CNT, CurrScore);
989981 }
990982 } else if (TII->isMTBUF (Inst)) {
991983 if (Inst.mayStore ())
992- setScoreByOperand (&Inst, TRI, MRI, Inst.getOperand (0 ), EXP_CNT,
993- CurrScore);
984+ setScoreByOperand (&Inst, Inst.getOperand (0 ), EXP_CNT, CurrScore);
994985 } else if (TII->isMUBUF (Inst)) {
995986 if (Inst.mayStore ()) {
996- setScoreByOperand (&Inst, TRI, MRI, Inst.getOperand (0 ), EXP_CNT,
997- CurrScore);
987+ setScoreByOperand (&Inst, Inst.getOperand (0 ), EXP_CNT, CurrScore);
998988 } else if (SIInstrInfo::isAtomicRet (Inst)) {
999- setScoreByOperand (&Inst, TRI, MRI,
989+ setScoreByOperand (&Inst,
1000990 *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
1001991 EXP_CNT, CurrScore);
1002992 }
1003993 } else if (TII->isLDSDIR (Inst)) {
1004994 // LDSDIR instructions attach the score to the destination.
1005- setScoreByOperand (&Inst, TRI, MRI,
995+ setScoreByOperand (&Inst,
1006996 *TII->getNamedOperand (Inst, AMDGPU::OpName::vdst),
1007997 EXP_CNT, CurrScore);
1008998 } else {
@@ -1013,18 +1003,18 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
10131003 // score.
10141004 for (MachineOperand &DefMO : Inst.all_defs ()) {
10151005 if (TRI->isVGPR (*MRI, DefMO.getReg ())) {
1016- setScoreByOperand (&Inst, TRI, MRI, DefMO, EXP_CNT, CurrScore);
1006+ setScoreByOperand (&Inst, DefMO, EXP_CNT, CurrScore);
10171007 }
10181008 }
10191009 }
10201010 for (const MachineOperand &Op : Inst.all_uses ()) {
10211011 if (TRI->isVectorRegister (*MRI, Op.getReg ()))
1022- setScoreByOperand (&Inst, TRI, MRI, Op, EXP_CNT, CurrScore);
1012+ setScoreByOperand (&Inst, Op, EXP_CNT, CurrScore);
10231013 }
10241014 }
10251015 } else if (T == X_CNT) {
10261016 for (const MachineOperand &Op : Inst.all_uses ())
1027- setScoreByOperand (&Inst, TRI, MRI, Op, T, CurrScore);
1017+ setScoreByOperand (&Inst, Op, T, CurrScore);
10281018 } else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ {
10291019 // Match the score to the destination registers.
10301020 //
@@ -1036,7 +1026,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
10361026 // Special cases where implicit register defs exists, such as M0 or VCC,
10371027 // but none with memory instructions.
10381028 for (const MachineOperand &Op : Inst.defs ()) {
1039- RegInterval Interval = getRegInterval (&Inst, MRI, TRI, Op);
1029+ RegInterval Interval = getRegInterval (&Inst, Op);
10401030 if (T == LOAD_CNT || T == SAMPLE_CNT || T == BVH_CNT) {
10411031 if (Interval.first >= NUM_ALL_VGPRS)
10421032 continue ;
@@ -1928,15 +1918,15 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
19281918 const auto &CallAddrOp = *TII->getNamedOperand (MI, AMDGPU::OpName::src0);
19291919 if (CallAddrOp.isReg ()) {
19301920 RegInterval CallAddrOpInterval =
1931- ScoreBrackets.getRegInterval (&MI, MRI, TRI, CallAddrOp);
1921+ ScoreBrackets.getRegInterval (&MI, CallAddrOp);
19321922
19331923 ScoreBrackets.determineWait (SmemAccessCounter, CallAddrOpInterval,
19341924 Wait);
19351925
19361926 if (const auto *RtnAddrOp =
19371927 TII->getNamedOperand (MI, AMDGPU::OpName::dst)) {
19381928 RegInterval RtnAddrOpInterval =
1939- ScoreBrackets.getRegInterval (&MI, MRI, TRI, *RtnAddrOp);
1929+ ScoreBrackets.getRegInterval (&MI, *RtnAddrOp);
19401930
19411931 ScoreBrackets.determineWait (SmemAccessCounter, RtnAddrOpInterval,
19421932 Wait);
@@ -2000,7 +1990,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
20001990 if (Op.isTied () && Op.isUse () && TII->doesNotReadTiedSource (MI))
20011991 continue ;
20021992
2003- RegInterval Interval = ScoreBrackets.getRegInterval (&MI, MRI, TRI, Op);
1993+ RegInterval Interval = ScoreBrackets.getRegInterval (&MI, Op);
20041994
20051995 const bool IsVGPR = TRI->isVectorRegister (*MRI, Op.getReg ());
20061996 if (IsVGPR) {
@@ -2237,16 +2227,15 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
22372227 if (TII->isDS (Inst) && TII->usesLGKM_CNT (Inst)) {
22382228 if (TII->isAlwaysGDS (Inst.getOpcode ()) ||
22392229 TII->hasModifiersSet (Inst, AMDGPU::OpName::gds)) {
2240- ScoreBrackets->updateByEvent (TII, TRI, MRI, GDS_ACCESS, Inst);
2241- ScoreBrackets->updateByEvent (TII, TRI, MRI, GDS_GPR_LOCK, Inst);
2230+ ScoreBrackets->updateByEvent (GDS_ACCESS, Inst);
2231+ ScoreBrackets->updateByEvent (GDS_GPR_LOCK, Inst);
22422232 ScoreBrackets->setPendingGDS ();
22432233 } else {
2244- ScoreBrackets->updateByEvent (TII, TRI, MRI, LDS_ACCESS, Inst);
2234+ ScoreBrackets->updateByEvent (LDS_ACCESS, Inst);
22452235 }
22462236 } else if (TII->isFLAT (Inst)) {
22472237 if (SIInstrInfo::isGFX12CacheInvOrWBInst (Inst.getOpcode ())) {
2248- ScoreBrackets->updateByEvent (TII, TRI, MRI, getVmemWaitEventType (Inst),
2249- Inst);
2238+ ScoreBrackets->updateByEvent (getVmemWaitEventType (Inst), Inst);
22502239 return ;
22512240 }
22522241
@@ -2257,13 +2246,12 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
22572246 if (TII->mayAccessVMEMThroughFlat (Inst)) {
22582247 ++FlatASCount;
22592248 IsVMEMAccess = true ;
2260- ScoreBrackets->updateByEvent (TII, TRI, MRI, getVmemWaitEventType (Inst),
2261- Inst);
2249+ ScoreBrackets->updateByEvent (getVmemWaitEventType (Inst), Inst);
22622250 }
22632251
22642252 if (TII->mayAccessLDSThroughFlat (Inst)) {
22652253 ++FlatASCount;
2266- ScoreBrackets->updateByEvent (TII, TRI, MRI, LDS_ACCESS, Inst);
2254+ ScoreBrackets->updateByEvent (LDS_ACCESS, Inst);
22672255 }
22682256
22692257 // This is a flat memory operation that access both VMEM and LDS, so note it
@@ -2274,16 +2262,15 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
22742262 } else if (SIInstrInfo::isVMEM (Inst) &&
22752263 !llvm::AMDGPU::getMUBUFIsBufferInv (Inst.getOpcode ())) {
22762264 IsVMEMAccess = true ;
2277- ScoreBrackets->updateByEvent (TII, TRI, MRI, getVmemWaitEventType (Inst),
2278- Inst);
2265+ ScoreBrackets->updateByEvent (getVmemWaitEventType (Inst), Inst);
22792266
22802267 if (ST->vmemWriteNeedsExpWaitcnt () &&
22812268 (Inst.mayStore () || SIInstrInfo::isAtomicRet (Inst))) {
2282- ScoreBrackets->updateByEvent (TII, TRI, MRI, VMW_GPR_LOCK, Inst);
2269+ ScoreBrackets->updateByEvent (VMW_GPR_LOCK, Inst);
22832270 }
22842271 } else if (TII->isSMRD (Inst)) {
22852272 IsSMEMAccess = true ;
2286- ScoreBrackets->updateByEvent (TII, TRI, MRI, SMEM_ACCESS, Inst);
2273+ ScoreBrackets->updateByEvent (SMEM_ACCESS, Inst);
22872274 } else if (Inst.isCall ()) {
22882275 if (callWaitsOnFunctionReturn (Inst)) {
22892276 // Act as a wait on everything
@@ -2295,33 +2282,33 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
22952282 ScoreBrackets->applyWaitcnt (AMDGPU::Waitcnt ());
22962283 }
22972284 } else if (SIInstrInfo::isLDSDIR (Inst)) {
2298- ScoreBrackets->updateByEvent (TII, TRI, MRI, EXP_LDS_ACCESS, Inst);
2285+ ScoreBrackets->updateByEvent (EXP_LDS_ACCESS, Inst);
22992286 } else if (TII->isVINTERP (Inst)) {
23002287 int64_t Imm = TII->getNamedOperand (Inst, AMDGPU::OpName::waitexp)->getImm ();
23012288 ScoreBrackets->applyWaitcnt (EXP_CNT, Imm);
23022289 } else if (SIInstrInfo::isEXP (Inst)) {
23032290 unsigned Imm = TII->getNamedOperand (Inst, AMDGPU::OpName::tgt)->getImm ();
23042291 if (Imm >= AMDGPU::Exp::ET_PARAM0 && Imm <= AMDGPU::Exp::ET_PARAM31)
2305- ScoreBrackets->updateByEvent (TII, TRI, MRI, EXP_PARAM_ACCESS, Inst);
2292+ ScoreBrackets->updateByEvent (EXP_PARAM_ACCESS, Inst);
23062293 else if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST)
2307- ScoreBrackets->updateByEvent (TII, TRI, MRI, EXP_POS_ACCESS, Inst);
2294+ ScoreBrackets->updateByEvent (EXP_POS_ACCESS, Inst);
23082295 else
2309- ScoreBrackets->updateByEvent (TII, TRI, MRI, EXP_GPR_LOCK, Inst);
2296+ ScoreBrackets->updateByEvent (EXP_GPR_LOCK, Inst);
23102297 } else if (SIInstrInfo::isSBarrierSCCWrite (Inst.getOpcode ())) {
2311- ScoreBrackets->updateByEvent (TII, TRI, MRI, SCC_WRITE, Inst);
2298+ ScoreBrackets->updateByEvent (SCC_WRITE, Inst);
23122299 } else {
23132300 switch (Inst.getOpcode ()) {
23142301 case AMDGPU::S_SENDMSG:
23152302 case AMDGPU::S_SENDMSG_RTN_B32:
23162303 case AMDGPU::S_SENDMSG_RTN_B64:
23172304 case AMDGPU::S_SENDMSGHALT:
2318- ScoreBrackets->updateByEvent (TII, TRI, MRI, SQ_MESSAGE, Inst);
2305+ ScoreBrackets->updateByEvent (SQ_MESSAGE, Inst);
23192306 break ;
23202307 case AMDGPU::S_MEMTIME:
23212308 case AMDGPU::S_MEMREALTIME:
23222309 case AMDGPU::S_GET_BARRIER_STATE_M0:
23232310 case AMDGPU::S_GET_BARRIER_STATE_IMM:
2324- ScoreBrackets->updateByEvent (TII, TRI, MRI, SMEM_ACCESS, Inst);
2311+ ScoreBrackets->updateByEvent (SMEM_ACCESS, Inst);
23252312 break ;
23262313 }
23272314 }
@@ -2330,10 +2317,10 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
23302317 return ;
23312318
23322319 if (IsVMEMAccess)
2333- ScoreBrackets->updateByEvent (TII, TRI, MRI, VMEM_GROUP, Inst);
2320+ ScoreBrackets->updateByEvent (VMEM_GROUP, Inst);
23342321
23352322 if (IsSMEMAccess)
2336- ScoreBrackets->updateByEvent (TII, TRI, MRI, SMEM_GROUP, Inst);
2323+ ScoreBrackets->updateByEvent (SMEM_GROUP, Inst);
23372324}
23382325
23392326bool WaitcntBrackets::mergeScore (const MergeInfo &M, unsigned &Score,
@@ -2637,7 +2624,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
26372624 for (const MachineOperand &Op : MI.all_uses ()) {
26382625 if (Op.isDebug () || !TRI->isVectorRegister (*MRI, Op.getReg ()))
26392626 continue ;
2640- RegInterval Interval = Brackets.getRegInterval (&MI, MRI, TRI, Op);
2627+ RegInterval Interval = Brackets.getRegInterval (&MI, Op);
26412628 // Vgpr use
26422629 for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
26432630 // If we find a register that is loaded inside the loop, 1. and 2.
@@ -2662,7 +2649,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
26622649 // VMem load vgpr def
26632650 if (isVMEMOrFlatVMEM (MI) && MI.mayLoad ()) {
26642651 for (const MachineOperand &Op : MI.all_defs ()) {
2665- RegInterval Interval = Brackets.getRegInterval (&MI, MRI, TRI, Op);
2652+ RegInterval Interval = Brackets.getRegInterval (&MI, Op);
26662653 for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
26672654 // If we find a register that is loaded inside the loop, 1. and 2.
26682655 // are invalidated and we can exit.
0 commit comments