@@ -62,6 +62,7 @@ class SIPeepholeSDWA {
6262 std::unique_ptr<SDWAOperand> matchSDWAOperand (MachineInstr &MI);
6363 void pseudoOpConvertToVOP2 (MachineInstr &MI,
6464 const GCNSubtarget &ST) const ;
65+ MachineInstr *createSDWAVersion (MachineInstr &MI);
6566 bool convertToSDWA (MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
6667 void legalizeScalarOperands (MachineInstr &MI, const GCNSubtarget &ST) const ;
6768
@@ -85,11 +86,18 @@ class SIPeepholeSDWALegacy : public MachineFunctionPass {
8586 }
8687};
8788
89+ using namespace AMDGPU ::SDWA;
90+
8891class SDWAOperand {
8992private:
9093 MachineOperand *Target; // Operand that would be used in converted instruction
9194 MachineOperand *Replaced; // Operand that would be replace by Target
9295
96+ // / Returns true iff the SDWA selection of this SDWAOperand can be combined
97+ // / with the SDWA selections of its uses in \p MI.
98+ virtual bool canCombineSelections (const MachineInstr &MI,
99+ const SIInstrInfo *TII) = 0;
100+
93101public:
94102 SDWAOperand (MachineOperand *TargetOp, MachineOperand *ReplacedOp)
95103 : Target(TargetOp), Replaced(ReplacedOp) {
@@ -118,8 +126,6 @@ class SDWAOperand {
118126#endif
119127};
120128
121- using namespace AMDGPU ::SDWA;
122-
123129class SDWASrcOperand : public SDWAOperand {
124130private:
125131 SdwaSel SrcSel;
@@ -131,13 +137,15 @@ class SDWASrcOperand : public SDWAOperand {
131137 SDWASrcOperand (MachineOperand *TargetOp, MachineOperand *ReplacedOp,
132138 SdwaSel SrcSel_ = DWORD, bool Abs_ = false , bool Neg_ = false ,
133139 bool Sext_ = false )
134- : SDWAOperand(TargetOp, ReplacedOp),
135- SrcSel (SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {}
140+ : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
141+ Neg (Neg_), Sext(Sext_) {}
136142
137143 MachineInstr *potentialToConvert (const SIInstrInfo *TII,
138144 const GCNSubtarget &ST,
139145 SDWAOperandsMap *PotentialMatches = nullptr ) override ;
140146 bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) override ;
147+ bool canCombineSelections (const MachineInstr &MI,
148+ const SIInstrInfo *TII) override ;
141149
142150 SdwaSel getSrcSel () const { return SrcSel; }
143151 bool getAbs () const { return Abs; }
@@ -158,15 +166,16 @@ class SDWADstOperand : public SDWAOperand {
158166 DstUnused DstUn;
159167
160168public:
161-
162169 SDWADstOperand (MachineOperand *TargetOp, MachineOperand *ReplacedOp,
163170 SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
164- : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
171+ : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
165172
166173 MachineInstr *potentialToConvert (const SIInstrInfo *TII,
167174 const GCNSubtarget &ST,
168175 SDWAOperandsMap *PotentialMatches = nullptr ) override ;
169176 bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) override ;
177+ bool canCombineSelections (const MachineInstr &MI,
178+ const SIInstrInfo *TII) override ;
170179
171180 SdwaSel getDstSel () const { return DstSel; }
172181 DstUnused getDstUnused () const { return DstUn; }
@@ -187,6 +196,8 @@ class SDWADstPreserveOperand : public SDWADstOperand {
187196 Preserve (PreserveOp) {}
188197
189198 bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) override ;
199+ bool canCombineSelections (const MachineInstr &MI,
200+ const SIInstrInfo *TII) override ;
190201
191202 MachineOperand *getPreservedOperand () const { return Preserve; }
192203
@@ -314,6 +325,38 @@ static MachineOperand *findSingleRegDef(const MachineOperand *Reg,
314325 return nullptr ;
315326}
316327
328+ // / Combine an SDWA instruction's existing SDWA selection \p Sel with
329+ // / the SDWA selection \p OperandSel of its operand. If the selections
330+ // / are compatible, return the combined selection, otherwise return a
331+ // / nullopt.
332+ // / For example, if we have Sel = BYTE_0 Sel and OperandSel = WORD_1:
333+ // / BYTE_0 Sel (WORD_1 Sel (%X)) -> BYTE_2 Sel (%X)
334+ static std::optional<SdwaSel> combineSdwaSel (SdwaSel Sel, SdwaSel OperandSel) {
335+ if (Sel == SdwaSel::DWORD)
336+ return OperandSel;
337+
338+ if (Sel == OperandSel || OperandSel == SdwaSel::DWORD)
339+ return Sel;
340+
341+ if (Sel == SdwaSel::WORD_1 || Sel == SdwaSel::BYTE_2 ||
342+ Sel == SdwaSel::BYTE_3)
343+ return {};
344+
345+ if (OperandSel == SdwaSel::WORD_0)
346+ return Sel;
347+
348+ if (OperandSel == SdwaSel::WORD_1) {
349+ if (Sel == SdwaSel::BYTE_0)
350+ return SdwaSel::BYTE_2;
351+ if (Sel == SdwaSel::BYTE_1)
352+ return SdwaSel::BYTE_3;
353+ if (Sel == SdwaSel::WORD_0)
354+ return SdwaSel::WORD_1;
355+ }
356+
357+ return {};
358+ }
359+
317360uint64_t SDWASrcOperand::getSrcMods (const SIInstrInfo *TII,
318361 const MachineOperand *SrcOp) const {
319362 uint64_t Mods = 0 ;
@@ -350,7 +393,8 @@ MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII,
350393
351394 for (MachineInstr &UseMI : getMRI ()->use_nodbg_instructions (Reg->getReg ()))
352395 // Check that all instructions that use Reg can be converted
353- if (!isConvertibleToSDWA (UseMI, ST, TII))
396+ if (!isConvertibleToSDWA (UseMI, ST, TII) ||
397+ !canCombineSelections (UseMI, TII))
354398 return nullptr ;
355399
356400 // Now that it's guaranteed all uses are legal, iterate over the uses again
@@ -372,7 +416,9 @@ MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII,
372416 if (!PotentialMO)
373417 return nullptr ;
374418
375- return PotentialMO->getParent ();
419+ MachineInstr *Parent = PotentialMO->getParent ();
420+
421+ return canCombineSelections (*Parent, TII) ? Parent : nullptr ;
376422}
377423
378424bool SDWASrcOperand::convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) {
@@ -451,13 +497,55 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
451497 }
452498 copyRegOperand (*Src, *getTargetOperand ());
453499 if (!IsPreserveSrc) {
454- SrcSel->setImm (getSrcSel ());
500+ SdwaSel ExistingSel = static_cast <SdwaSel>(SrcSel->getImm ());
501+ SrcSel->setImm (*combineSdwaSel (ExistingSel, getSrcSel ()));
455502 SrcMods->setImm (getSrcMods (TII, Src));
456503 }
457504 getTargetOperand ()->setIsKill (false );
458505 return true ;
459506}
460507
508+ // / Verify that the SDWA selection operand \p SrcSelOpName of the SDWA
509+ // / instruction \p MI can be combined with the selection \p OpSel.
510+ static bool canCombineOpSel (const MachineInstr &MI, const SIInstrInfo *TII,
511+ AMDGPU::OpName SrcSelOpName, SdwaSel OpSel) {
512+ assert (TII->isSDWA (MI.getOpcode ()));
513+
514+ const MachineOperand *SrcSelOp = TII->getNamedOperand (MI, SrcSelOpName);
515+ SdwaSel SrcSel = static_cast <SdwaSel>(SrcSelOp->getImm ());
516+
517+ return combineSdwaSel (SrcSel, OpSel).has_value ();
518+ }
519+
520+ // / Verify that \p Op is the same register as the operand of the SDWA
521+ // / instruction \p MI named by \p SrcOpName and that the SDWA
522+ // / selection \p SrcSelOpName can be combined with the \p OpSel.
523+ static bool canCombineOpSel (const MachineInstr &MI, const SIInstrInfo *TII,
524+ AMDGPU::OpName SrcOpName,
525+ AMDGPU::OpName SrcSelOpName, MachineOperand *Op,
526+ SdwaSel OpSel) {
527+ assert (TII->isSDWA (MI.getOpcode ()));
528+
529+ const MachineOperand *Src = TII->getNamedOperand (MI, SrcOpName);
530+ if (!Src || !isSameReg (*Src, *Op))
531+ return true ;
532+
533+ return canCombineOpSel (MI, TII, SrcSelOpName, OpSel);
534+ }
535+
536+ bool SDWASrcOperand::canCombineSelections (const MachineInstr &MI,
537+ const SIInstrInfo *TII) {
538+ if (!TII->isSDWA (MI.getOpcode ()))
539+ return true ;
540+
541+ using namespace AMDGPU ;
542+
543+ return canCombineOpSel (MI, TII, OpName::src0, OpName::src0_sel,
544+ getReplacedOperand (), getSrcSel ()) &&
545+ canCombineOpSel (MI, TII, OpName::src1, OpName::src1_sel,
546+ getReplacedOperand (), getSrcSel ());
547+ }
548+
461549MachineInstr *SDWADstOperand::potentialToConvert (const SIInstrInfo *TII,
462550 const GCNSubtarget &ST,
463551 SDWAOperandsMap *PotentialMatches) {
@@ -476,7 +564,8 @@ MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII,
476564 return nullptr ;
477565 }
478566
479- return PotentialMO->getParent ();
567+ MachineInstr *Parent = PotentialMO->getParent ();
568+ return canCombineSelections (*Parent, TII) ? Parent : nullptr ;
480569}
481570
482571bool SDWADstOperand::convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) {
@@ -498,7 +587,10 @@ bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
498587 copyRegOperand (*Operand, *getTargetOperand ());
499588 MachineOperand *DstSel= TII->getNamedOperand (MI, AMDGPU::OpName::dst_sel);
500589 assert (DstSel);
501- DstSel->setImm (getDstSel ());
590+
591+ SdwaSel ExistingSel = static_cast <SdwaSel>(DstSel->getImm ());
592+ DstSel->setImm (combineSdwaSel (ExistingSel, getDstSel ()).value ());
593+
502594 MachineOperand *DstUnused= TII->getNamedOperand (MI, AMDGPU::OpName::dst_unused);
503595 assert (DstUnused);
504596 DstUnused->setImm (getDstUnused ());
@@ -509,6 +601,14 @@ bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
509601 return true ;
510602}
511603
604+ bool SDWADstOperand::canCombineSelections (const MachineInstr &MI,
605+ const SIInstrInfo *TII) {
606+ if (!TII->isSDWA (MI.getOpcode ()))
607+ return true ;
608+
609+ return canCombineOpSel (MI, TII, AMDGPU::OpName::dst_sel, getDstSel ());
610+ }
611+
512612bool SDWADstPreserveOperand::convertToSDWA (MachineInstr &MI,
513613 const SIInstrInfo *TII) {
514614 // MI should be moved right before v_or_b32.
@@ -538,6 +638,11 @@ bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
538638 return SDWADstOperand::convertToSDWA (MI, TII);
539639}
540640
641+ bool SDWADstPreserveOperand::canCombineSelections (const MachineInstr &MI,
642+ const SIInstrInfo *TII) {
643+ return SDWADstOperand::canCombineSelections (MI, TII);
644+ }
645+
541646std::optional<int64_t >
542647SIPeepholeSDWA::foldToImm (const MachineOperand &Op) const {
543648 if (Op.isImm ()) {
@@ -962,11 +1067,8 @@ bool isConvertibleToSDWA(MachineInstr &MI,
9621067 const SIInstrInfo* TII) {
9631068 // Check if this is already an SDWA instruction
9641069 unsigned Opc = MI.getOpcode ();
965- if (TII->isSDWA (Opc)) {
966- // FIXME: Reenable after fixing selection handling.
967- // Cf. llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll
968- return false ;
969- }
1070+ if (TII->isSDWA (Opc))
1071+ return true ;
9701072
9711073 // Check if this instruction has opcode that supports SDWA
9721074 if (AMDGPU::getSDWAOp (Opc) == -1 )
@@ -1024,21 +1126,13 @@ bool isConvertibleToSDWA(MachineInstr &MI,
10241126}
10251127} // namespace
10261128
1027- bool SIPeepholeSDWA::convertToSDWA (MachineInstr &MI,
1028- const SDWAOperandsVector &SDWAOperands) {
1029-
1030- LLVM_DEBUG (dbgs () << " Convert instruction:" << MI);
1031-
1032- // Convert to sdwa
1033- int SDWAOpcode;
1129+ MachineInstr *SIPeepholeSDWA::createSDWAVersion (MachineInstr &MI) {
10341130 unsigned Opcode = MI.getOpcode ();
1035- if (TII->isSDWA (Opcode)) {
1036- SDWAOpcode = Opcode;
1037- } else {
1038- SDWAOpcode = AMDGPU::getSDWAOp (Opcode);
1039- if (SDWAOpcode == -1 )
1040- SDWAOpcode = AMDGPU::getSDWAOp (AMDGPU::getVOPe32 (Opcode));
1041- }
1131+ assert (!TII->isSDWA (Opcode));
1132+
1133+ int SDWAOpcode = AMDGPU::getSDWAOp (Opcode);
1134+ if (SDWAOpcode == -1 )
1135+ SDWAOpcode = AMDGPU::getSDWAOp (AMDGPU::getVOPe32 (Opcode));
10421136 assert (SDWAOpcode != -1 );
10431137
10441138 const MCInstrDesc &SDWADesc = TII->get (SDWAOpcode);
@@ -1172,6 +1266,24 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
11721266 SDWAInst->tieOperands (PreserveDstIdx, SDWAInst->getNumOperands () - 1 );
11731267 }
11741268
1269+ return SDWAInst.getInstr ();
1270+ }
1271+
1272+ bool SIPeepholeSDWA::convertToSDWA (MachineInstr &MI,
1273+ const SDWAOperandsVector &SDWAOperands) {
1274+ LLVM_DEBUG (dbgs () << " Convert instruction:" << MI);
1275+
1276+ MachineInstr *SDWAInst;
1277+ if (TII->isSDWA (MI.getOpcode ())) {
1278+ // Clone the instruction to allow revoking changes
1279+ // made to MI during the processing of the operands
1280+ // if the conversion fails.
1281+ SDWAInst = MI.getParent ()->getParent ()->CloneMachineInstr (&MI);
1282+ MI.getParent ()->insert (MI.getIterator (), SDWAInst);
1283+ } else {
1284+ SDWAInst = createSDWAVersion (MI);
1285+ }
1286+
11751287 // Apply all sdwa operand patterns.
11761288 bool Converted = false ;
11771289 for (auto &Operand : SDWAOperands) {
@@ -1190,19 +1302,18 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
11901302 Converted |= Operand->convertToSDWA (*SDWAInst, TII);
11911303 }
11921304
1193- if (Converted) {
1194- ConvertedInstructions.push_back (SDWAInst);
1195- for (MachineOperand &MO : SDWAInst->uses ()) {
1196- if (!MO.isReg ())
1197- continue ;
1198-
1199- MRI->clearKillFlags (MO.getReg ());
1200- }
1201- } else {
1305+ if (!Converted) {
12021306 SDWAInst->eraseFromParent ();
12031307 return false ;
12041308 }
12051309
1310+ ConvertedInstructions.push_back (SDWAInst);
1311+ for (MachineOperand &MO : SDWAInst->uses ()) {
1312+ if (!MO.isReg ())
1313+ continue ;
1314+
1315+ MRI->clearKillFlags (MO.getReg ());
1316+ }
12061317 LLVM_DEBUG (dbgs () << " \n Into:" << *SDWAInst << ' \n ' );
12071318 ++NumSDWAInstructionsPeepholed;
12081319
@@ -1285,10 +1396,11 @@ bool SIPeepholeSDWA::run(MachineFunction &MF) {
12851396
12861397 for (const auto &OperandPair : SDWAOperands) {
12871398 const auto &Operand = OperandPair.second ;
1288- MachineInstr *PotentialMI = Operand->potentialToConvert (TII, ST, &PotentialMatches);
1289- if (PotentialMI && isConvertibleToSDWA (*PotentialMI, ST, TII)) {
1399+ MachineInstr *PotentialMI =
1400+ Operand->potentialToConvert (TII, ST, &PotentialMatches);
1401+
1402+ if (PotentialMI && isConvertibleToSDWA (*PotentialMI, ST, TII))
12901403 PotentialMatches[PotentialMI].push_back (Operand.get ());
1291- }
12921404 }
12931405
12941406 for (auto &PotentialPair : PotentialMatches) {
0 commit comments