@@ -304,7 +304,8 @@ class WaitcntBrackets {
304304
305305 RegInterval getRegInterval (const MachineInstr *MI,
306306 const MachineRegisterInfo *MRI,
307- const SIRegisterInfo *TRI, unsigned OpNo) const ;
307+ const SIRegisterInfo *TRI,
308+ const MachineOperand &Op) const ;
308309
309310 bool counterOutOfOrder (InstCounterType T) const ;
310311 void simplifyWaitcnt (AMDGPU::Waitcnt &Wait) const ;
@@ -405,9 +406,9 @@ class WaitcntBrackets {
405406 }
406407 }
407408
408- void setExpScore (const MachineInstr *MI, const SIInstrInfo *TII ,
409- const SIRegisterInfo *TRI , const MachineRegisterInfo *MRI ,
410- unsigned OpNo, unsigned Val);
409+ void setExpScore (const MachineInstr *MI, const SIRegisterInfo *TRI ,
410+ const MachineRegisterInfo *MRI , const MachineOperand &Op ,
411+ unsigned Val);
411412
412413 const GCNSubtarget *ST = nullptr ;
413414 InstCounterType MaxCounter = NUM_EXTENDED_INST_CNTS;
@@ -734,8 +735,7 @@ class SIInsertWaitcnts : public MachineFunctionPass {
734735RegInterval WaitcntBrackets::getRegInterval (const MachineInstr *MI,
735736 const MachineRegisterInfo *MRI,
736737 const SIRegisterInfo *TRI,
737- unsigned OpNo) const {
738- const MachineOperand &Op = MI->getOperand (OpNo);
738+ const MachineOperand &Op) const {
739739 if (!TRI->isInAllocatableClass (Op.getReg ()))
740740 return {-1 , -1 };
741741
@@ -773,12 +773,11 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
773773}
774774
775775void WaitcntBrackets::setExpScore (const MachineInstr *MI,
776- const SIInstrInfo *TII,
777776 const SIRegisterInfo *TRI,
778- const MachineRegisterInfo *MRI, unsigned OpNo,
779- unsigned Val) {
780- RegInterval Interval = getRegInterval (MI, MRI, TRI, OpNo );
781- assert (TRI->isVectorRegister (*MRI, MI-> getOperand (OpNo) .getReg ()));
777+ const MachineRegisterInfo *MRI,
778+ const MachineOperand &Op, unsigned Val) {
779+ RegInterval Interval = getRegInterval (MI, MRI, TRI, Op );
780+ assert (TRI->isVectorRegister (*MRI, Op .getReg ()));
782781 for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
783782 setRegScore (RegNo, EXP_CNT, Val);
784783 }
@@ -804,79 +803,60 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
804803 // Put score on the source vgprs. If this is a store, just use those
805804 // specific register(s).
806805 if (TII->isDS (Inst) && (Inst.mayStore () || Inst.mayLoad ())) {
807- int AddrOpIdx =
808- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::addr);
809806 // All GDS operations must protect their address register (same as
810807 // export.)
811- if (AddrOpIdx != -1 ) {
812- setExpScore (&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore);
813- }
808+ if (const auto *AddrOp = TII->getNamedOperand (Inst, AMDGPU::OpName::addr))
809+ setExpScore (&Inst, TRI, MRI, *AddrOp, CurrScore);
814810
815811 if (Inst.mayStore ()) {
816- if (AMDGPU::hasNamedOperand (Inst.getOpcode (), AMDGPU::OpName::data0)) {
817- setExpScore (
818- &Inst, TII, TRI, MRI,
819- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::data0),
820- CurrScore);
821- }
822- if (AMDGPU::hasNamedOperand (Inst.getOpcode (), AMDGPU::OpName::data1)) {
823- setExpScore (&Inst, TII, TRI, MRI,
824- AMDGPU::getNamedOperandIdx (Inst.getOpcode (),
825- AMDGPU::OpName::data1),
826- CurrScore);
827- }
812+ if (const auto *Data0 =
813+ TII->getNamedOperand (Inst, AMDGPU::OpName::data0))
814+ setExpScore (&Inst, TRI, MRI, *Data0, CurrScore);
815+ if (const auto *Data1 =
816+ TII->getNamedOperand (Inst, AMDGPU::OpName::data1))
817+ setExpScore (&Inst, TRI, MRI, *Data1, CurrScore);
828818 } else if (SIInstrInfo::isAtomicRet (Inst) && !SIInstrInfo::isGWS (Inst) &&
829819 Inst.getOpcode () != AMDGPU::DS_APPEND &&
830820 Inst.getOpcode () != AMDGPU::DS_CONSUME &&
831821 Inst.getOpcode () != AMDGPU::DS_ORDERED_COUNT) {
832- for (unsigned I = 0 , E = Inst.getNumOperands (); I != E; ++I) {
833- const MachineOperand &Op = Inst.getOperand (I);
834- if (Op.isReg () && !Op.isDef () &&
835- TRI->isVectorRegister (*MRI, Op.getReg ())) {
836- setExpScore (&Inst, TII, TRI, MRI, I, CurrScore);
837- }
822+ for (const MachineOperand &Op : Inst.all_uses ()) {
823+ if (Op.isReg () && TRI->isVectorRegister (*MRI, Op.getReg ()))
824+ setExpScore (&Inst, TRI, MRI, Op, CurrScore);
838825 }
839826 }
840827 } else if (TII->isFLAT (Inst)) {
841828 if (Inst.mayStore ()) {
842- setExpScore (
843- &Inst, TII, TRI, MRI,
844- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::data),
845- CurrScore);
829+ setExpScore (&Inst, TRI, MRI,
830+ *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
831+ CurrScore);
846832 } else if (SIInstrInfo::isAtomicRet (Inst)) {
847- setExpScore (
848- &Inst, TII, TRI, MRI,
849- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::data),
850- CurrScore);
833+ setExpScore (&Inst, TRI, MRI,
834+ *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
835+ CurrScore);
851836 }
852837 } else if (TII->isMIMG (Inst)) {
853838 if (Inst.mayStore ()) {
854- setExpScore (&Inst, TII, TRI, MRI, 0 , CurrScore);
839+ setExpScore (&Inst, TRI, MRI, Inst. getOperand ( 0 ) , CurrScore);
855840 } else if (SIInstrInfo::isAtomicRet (Inst)) {
856- setExpScore (
857- &Inst, TII, TRI, MRI,
858- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::data),
859- CurrScore);
841+ setExpScore (&Inst, TRI, MRI,
842+ *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
843+ CurrScore);
860844 }
861845 } else if (TII->isMTBUF (Inst)) {
862- if (Inst.mayStore ()) {
863- setExpScore (&Inst, TII, TRI, MRI, 0 , CurrScore);
864- }
846+ if (Inst.mayStore ())
847+ setExpScore (&Inst, TRI, MRI, Inst.getOperand (0 ), CurrScore);
865848 } else if (TII->isMUBUF (Inst)) {
866849 if (Inst.mayStore ()) {
867- setExpScore (&Inst, TII, TRI, MRI, 0 , CurrScore);
850+ setExpScore (&Inst, TRI, MRI, Inst. getOperand ( 0 ) , CurrScore);
868851 } else if (SIInstrInfo::isAtomicRet (Inst)) {
869- setExpScore (
870- &Inst, TII, TRI, MRI,
871- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::data),
872- CurrScore);
852+ setExpScore (&Inst, TRI, MRI,
853+ *TII->getNamedOperand (Inst, AMDGPU::OpName::data),
854+ CurrScore);
873855 }
874856 } else if (TII->isLDSDIR (Inst)) {
875857 // LDSDIR instructions attach the score to the destination.
876- setExpScore (
877- &Inst, TII, TRI, MRI,
878- AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::vdst),
879- CurrScore);
858+ setExpScore (&Inst, TRI, MRI,
859+ *TII->getNamedOperand (Inst, AMDGPU::OpName::vdst), CurrScore);
880860 } else {
881861 if (TII->isEXP (Inst)) {
882862 // For export the destination registers are really temps that
@@ -891,12 +871,9 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
891871 }
892872 }
893873 }
894- for (unsigned I = 0 , E = Inst.getNumOperands (); I != E; ++I) {
895- MachineOperand &MO = Inst.getOperand (I);
896- if (MO.isReg () && !MO.isDef () &&
897- TRI->isVectorRegister (*MRI, MO.getReg ())) {
898- setExpScore (&Inst, TII, TRI, MRI, I, CurrScore);
899- }
874+ for (const MachineOperand &Op : Inst.all_uses ()) {
875+ if (Op.isReg () && TRI->isVectorRegister (*MRI, Op.getReg ()))
876+ setExpScore (&Inst, TRI, MRI, Op, CurrScore);
900877 }
901878 }
902879 } else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ {
@@ -907,14 +884,10 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
907884 // artificial dependency, while these are there only for register liveness
908885 // accounting purposes.
909886 //
910- // Special cases where implicit register defs and uses exists, such as
911- // M0, FLAT_SCR or VCC, but the wait will be generated earlier in the
912- // generateWaitcntInstBefore() if that was loaded from memory.
913- for (unsigned I = 0 , E = Inst.getNumExplicitOperands (); I != E; ++I) {
914- auto &Op = Inst.getOperand (I);
915- if (!Op.isReg () || !Op.isDef ())
916- continue ;
917- RegInterval Interval = getRegInterval (&Inst, MRI, TRI, I);
887+ // Special cases where implicit register defs exists, such as M0 or VCC,
888+ // but none with memory instructions.
889+ for (const MachineOperand &Op : Inst.defs ()) {
890+ RegInterval Interval = getRegInterval (&Inst, MRI, TRI, Op);
918891 if (T == LOAD_CNT || T == SAMPLE_CNT || T == BVH_CNT) {
919892 if (Interval.first >= NUM_ALL_VGPRS)
920893 continue ;
@@ -1692,22 +1665,19 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
16921665 // load). We also need to check WAW dependency with saved PC.
16931666 Wait = AMDGPU::Waitcnt ();
16941667
1695- int CallAddrOpIdx =
1696- AMDGPU::getNamedOperandIdx (MI.getOpcode (), AMDGPU::OpName::src0);
1697-
1698- if (MI.getOperand (CallAddrOpIdx).isReg ()) {
1668+ const auto &CallAddrOp = *TII->getNamedOperand (MI, AMDGPU::OpName::src0);
1669+ if (CallAddrOp.isReg ()) {
16991670 RegInterval CallAddrOpInterval =
1700- ScoreBrackets.getRegInterval (&MI, MRI, TRI, CallAddrOpIdx );
1671+ ScoreBrackets.getRegInterval (&MI, MRI, TRI, CallAddrOp );
17011672
17021673 for (int RegNo = CallAddrOpInterval.first ;
17031674 RegNo < CallAddrOpInterval.second ; ++RegNo)
17041675 ScoreBrackets.determineWait (SmemAccessCounter, RegNo, Wait);
17051676
1706- int RtnAddrOpIdx =
1707- AMDGPU::getNamedOperandIdx (MI.getOpcode (), AMDGPU::OpName::dst);
1708- if (RtnAddrOpIdx != -1 ) {
1677+ if (const auto *RtnAddrOp =
1678+ TII->getNamedOperand (MI, AMDGPU::OpName::dst)) {
17091679 RegInterval RtnAddrOpInterval =
1710- ScoreBrackets.getRegInterval (&MI, MRI, TRI, RtnAddrOpIdx );
1680+ ScoreBrackets.getRegInterval (&MI, MRI, TRI, *RtnAddrOp );
17111681
17121682 for (int RegNo = RtnAddrOpInterval.first ;
17131683 RegNo < RtnAddrOpInterval.second ; ++RegNo)
@@ -1769,16 +1739,15 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
17691739 }
17701740
17711741 // Loop over use and def operands.
1772- for (unsigned I = 0 , E = MI.getNumOperands (); I != E; ++I) {
1773- MachineOperand &Op = MI.getOperand (I);
1742+ for (const MachineOperand &Op : MI.operands ()) {
17741743 if (!Op.isReg ())
17751744 continue ;
17761745
17771746 // If the instruction does not read tied source, skip the operand.
17781747 if (Op.isTied () && Op.isUse () && TII->doesNotReadTiedSource (MI))
17791748 continue ;
17801749
1781- RegInterval Interval = ScoreBrackets.getRegInterval (&MI, MRI, TRI, I );
1750+ RegInterval Interval = ScoreBrackets.getRegInterval (&MI, MRI, TRI, Op );
17821751
17831752 const bool IsVGPR = TRI->isVectorRegister (*MRI, Op.getReg ());
17841753 for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
@@ -2357,41 +2326,43 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
23572326 if (MI.mayStore ())
23582327 HasVMemStore = true ;
23592328 }
2360- for (unsigned I = 0 ; I < MI.getNumOperands (); I++) {
2361- MachineOperand &Op = MI.getOperand (I);
2329+ for (const MachineOperand &Op : MI.all_uses ()) {
23622330 if (!Op.isReg () || !TRI->isVectorRegister (*MRI, Op.getReg ()))
23632331 continue ;
2364- RegInterval Interval = Brackets.getRegInterval (&MI, MRI, TRI, I );
2332+ RegInterval Interval = Brackets.getRegInterval (&MI, MRI, TRI, Op );
23652333 // Vgpr use
2366- if (Op.isUse ()) {
2367- for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
2368- // If we find a register that is loaded inside the loop, 1. and 2.
2369- // are invalidated and we can exit.
2370- if (VgprDef.contains (RegNo))
2371- return false ;
2372- VgprUse.insert (RegNo);
2373- // If at least one of Op's registers is in the score brackets, the
2374- // value is likely loaded outside of the loop.
2375- if (Brackets.getRegScore (RegNo, LOAD_CNT) >
2376- Brackets.getScoreLB (LOAD_CNT) ||
2377- Brackets.getRegScore (RegNo, SAMPLE_CNT) >
2378- Brackets.getScoreLB (SAMPLE_CNT) ||
2379- Brackets.getRegScore (RegNo, BVH_CNT) >
2380- Brackets.getScoreLB (BVH_CNT)) {
2381- UsesVgprLoadedOutside = true ;
2382- break ;
2383- }
2334+ for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
2335+ // If we find a register that is loaded inside the loop, 1. and 2.
2336+ // are invalidated and we can exit.
2337+ if (VgprDef.contains (RegNo))
2338+ return false ;
2339+ VgprUse.insert (RegNo);
2340+ // If at least one of Op's registers is in the score brackets, the
2341+ // value is likely loaded outside of the loop.
2342+ if (Brackets.getRegScore (RegNo, LOAD_CNT) >
2343+ Brackets.getScoreLB (LOAD_CNT) ||
2344+ Brackets.getRegScore (RegNo, SAMPLE_CNT) >
2345+ Brackets.getScoreLB (SAMPLE_CNT) ||
2346+ Brackets.getRegScore (RegNo, BVH_CNT) >
2347+ Brackets.getScoreLB (BVH_CNT)) {
2348+ UsesVgprLoadedOutside = true ;
2349+ break ;
23842350 }
23852351 }
2386- // VMem load vgpr def
2387- else if (isVMEMOrFlatVMEM (MI) && MI.mayLoad () && Op.isDef ())
2352+ }
2353+
2354+ // VMem load vgpr def
2355+ if (isVMEMOrFlatVMEM (MI) && MI.mayLoad ()) {
2356+ for (const MachineOperand &Op : MI.all_defs ()) {
2357+ RegInterval Interval = Brackets.getRegInterval (&MI, MRI, TRI, Op);
23882358 for (int RegNo = Interval.first ; RegNo < Interval.second ; ++RegNo) {
23892359 // If we find a register that is loaded inside the loop, 1. and 2.
23902360 // are invalidated and we can exit.
23912361 if (VgprUse.contains (RegNo))
23922362 return false ;
23932363 VgprDef.insert (RegNo);
23942364 }
2365+ }
23952366 }
23962367 }
23972368 }
0 commit comments