@@ -37,20 +37,22 @@ STATISTIC(NumSDWAInstructionsPeepholed,
3737
3838namespace {
3939
40+ bool isConvertibleToSDWA (MachineInstr &MI, const GCNSubtarget &ST,
41+ const SIInstrInfo *TII);
4042class SDWAOperand ;
4143class SDWADstOperand ;
4244
43- class SIPeepholeSDWA : public MachineFunctionPass {
44- public:
45- using SDWAOperandsVector = SmallVector<SDWAOperand *, 4 >;
45+ using SDWAOperandsVector = SmallVector<SDWAOperand *, 4 >;
46+ using SDWAOperandsMap = MapVector<MachineInstr *, SDWAOperandsVector>;
4647
48+ class SIPeepholeSDWA : public MachineFunctionPass {
4749private:
4850 MachineRegisterInfo *MRI;
4951 const SIRegisterInfo *TRI;
5052 const SIInstrInfo *TII;
5153
5254 MapVector<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
53- MapVector<MachineInstr *, SDWAOperandsVector> PotentialMatches;
55+ SDWAOperandsMap PotentialMatches;
5456 SmallVector<MachineInstr *, 8 > ConvertedInstructions;
5557
5658 std::optional<int64_t > foldToImm (const MachineOperand &Op) const ;
@@ -65,7 +67,6 @@ class SIPeepholeSDWA : public MachineFunctionPass {
6567 bool runOnMachineFunction (MachineFunction &MF) override ;
6668 void matchSDWAOperands (MachineBasicBlock &MBB);
6769 std::unique_ptr<SDWAOperand> matchSDWAOperand (MachineInstr &MI);
68- bool isConvertibleToSDWA (MachineInstr &MI, const GCNSubtarget &ST) const ;
6970 void pseudoOpConvertToVOP2 (MachineInstr &MI,
7071 const GCNSubtarget &ST) const ;
7172 bool convertToSDWA (MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
@@ -93,7 +94,9 @@ class SDWAOperand {
9394
9495 virtual ~SDWAOperand () = default ;
9596
96- virtual MachineInstr *potentialToConvert (const SIInstrInfo *TII) = 0;
97+ virtual MachineInstr *potentialToConvert (const SIInstrInfo *TII,
98+ const GCNSubtarget &ST,
99+ SDWAOperandsMap *PotentialMatches = nullptr ) = 0;
97100 virtual bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) = 0;
98101
99102 MachineOperand *getTargetOperand () const { return Target; }
@@ -126,7 +129,9 @@ class SDWASrcOperand : public SDWAOperand {
126129 : SDWAOperand(TargetOp, ReplacedOp),
127130 SrcSel (SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {}
128131
129- MachineInstr *potentialToConvert (const SIInstrInfo *TII) override ;
132+ MachineInstr *potentialToConvert (const SIInstrInfo *TII,
133+ const GCNSubtarget &ST,
134+ SDWAOperandsMap *PotentialMatches = nullptr ) override ;
130135 bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) override ;
131136
132137 SdwaSel getSrcSel () const { return SrcSel; }
@@ -153,7 +158,9 @@ class SDWADstOperand : public SDWAOperand {
153158 SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
154159 : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
155160
156- MachineInstr *potentialToConvert (const SIInstrInfo *TII) override ;
161+ MachineInstr *potentialToConvert (const SIInstrInfo *TII,
162+ const GCNSubtarget &ST,
163+ SDWAOperandsMap *PotentialMatches = nullptr ) override ;
157164 bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) override ;
158165
159166 SdwaSel getDstSel () const { return DstSel; }
@@ -327,7 +334,33 @@ uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
327334 return Mods;
328335}
329336
330- MachineInstr *SDWASrcOperand::potentialToConvert (const SIInstrInfo *TII) {
337+ MachineInstr *SDWASrcOperand::potentialToConvert (const SIInstrInfo *TII,
338+ const GCNSubtarget &ST,
339+ SDWAOperandsMap *PotentialMatches) {
340+ if (PotentialMatches != nullptr ) {
341+ // Fill out the map for all uses if all can be converted
342+ MachineOperand *Reg = getReplacedOperand ();
343+ if (!Reg->isReg () || !Reg->isDef ())
344+ return nullptr ;
345+
346+ for (MachineInstr &UseMI : getMRI ()->use_nodbg_instructions (Reg->getReg ()))
347+ // Check that all instructions that use Reg can be converted
348+ if (!isConvertibleToSDWA (UseMI, ST, TII))
349+ return nullptr ;
350+
351+ // Now that it's guaranteed all uses are legal, iterate over the uses again
352+ // to add them for later conversion.
353+ for (MachineOperand &UseMO : getMRI ()->use_nodbg_operands (Reg->getReg ())) {
354+ // Should not get a subregister here
355+ assert (isSameReg (UseMO, *Reg));
356+
357+ SDWAOperandsMap &potentialMatchesMap = *PotentialMatches;
358+ MachineInstr *UseMI = UseMO.getParent ();
359+ potentialMatchesMap[UseMI].push_back (this );
360+ }
361+ return nullptr ;
362+ }
363+
331364 // For SDWA src operand potential instruction is one that use register
332365 // defined by parent instruction
333366 MachineOperand *PotentialMO = findSingleRegUse (getReplacedOperand (), getMRI ());
@@ -420,7 +453,9 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
420453 return true ;
421454}
422455
423- MachineInstr *SDWADstOperand::potentialToConvert (const SIInstrInfo *TII) {
456+ MachineInstr *SDWADstOperand::potentialToConvert (const SIInstrInfo *TII,
457+ const GCNSubtarget &ST,
458+ SDWAOperandsMap *PotentialMatches) {
424459 // For SDWA dst operand potential instruction is one that defines register
425460 // that this operand uses
426461 MachineRegisterInfo *MRI = getMRI ();
@@ -919,8 +954,10 @@ void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
919954 MISucc.substituteRegister (CarryIn->getReg (), TRI->getVCC (), 0 , *TRI);
920955}
921956
922- bool SIPeepholeSDWA::isConvertibleToSDWA (MachineInstr &MI,
923- const GCNSubtarget &ST) const {
957+ namespace {
958+ bool isConvertibleToSDWA (MachineInstr &MI,
959+ const GCNSubtarget &ST,
960+ const SIInstrInfo* TII) {
924961 // Check if this is already an SDWA instruction
925962 unsigned Opc = MI.getOpcode ();
926963 if (TII->isSDWA (Opc))
@@ -980,6 +1017,7 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
9801017
9811018 return true ;
9821019}
1020+ } // namespace
9831021
9841022bool SIPeepholeSDWA::convertToSDWA (MachineInstr &MI,
9851023 const SDWAOperandsVector &SDWAOperands) {
@@ -1215,7 +1253,7 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
12151253 matchSDWAOperands (MBB);
12161254 for (const auto &OperandPair : SDWAOperands) {
12171255 const auto &Operand = OperandPair.second ;
1218- MachineInstr *PotentialMI = Operand->potentialToConvert (TII);
1256+ MachineInstr *PotentialMI = Operand->potentialToConvert (TII, ST );
12191257 if (PotentialMI &&
12201258 (PotentialMI->getOpcode () == AMDGPU::V_ADD_CO_U32_e64 ||
12211259 PotentialMI->getOpcode () == AMDGPU::V_SUB_CO_U32_e64))
@@ -1228,8 +1266,8 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
12281266
12291267 for (const auto &OperandPair : SDWAOperands) {
12301268 const auto &Operand = OperandPair.second ;
1231- MachineInstr *PotentialMI = Operand->potentialToConvert (TII);
1232- if (PotentialMI && isConvertibleToSDWA (*PotentialMI, ST)) {
1269+ MachineInstr *PotentialMI = Operand->potentialToConvert (TII, ST, &PotentialMatches );
1270+ if (PotentialMI && isConvertibleToSDWA (*PotentialMI, ST, TII )) {
12331271 PotentialMatches[PotentialMI].push_back (Operand.get ());
12341272 }
12351273 }
0 commit comments