@@ -1372,8 +1372,8 @@ bool AMDGPUInstructionSelector::selectIntrinsicCmp(MachineInstr &I) const {
13721372 MachineInstrBuilder SelectedMI;
13731373 MachineOperand &LHS = I.getOperand (2 );
13741374 MachineOperand &RHS = I.getOperand (3 );
1375- auto [Src0, Src0Mods] = selectVOP3ModsImpl (LHS);
1376- auto [Src1, Src1Mods] = selectVOP3ModsImpl (RHS);
1375+ auto [Src0, Src0Mods] = selectVOP3ModsImpl (LHS. getReg () );
1376+ auto [Src1, Src1Mods] = selectVOP3ModsImpl (RHS. getReg () );
13771377 Register Src0Reg =
13781378 copyToVGPRIfSrcFolded (Src0, Src0Mods, LHS, &I, /* ForceVGPR*/ true );
13791379 Register Src1Reg =
@@ -2467,14 +2467,48 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
24672467 return false ;
24682468}
24692469
2470+ static Register stripCopy (Register Reg, MachineRegisterInfo &MRI) {
2471+ return getDefSrcRegIgnoringCopies (Reg, MRI)->Reg ;
2472+ }
2473+
2474+ static Register stripBitCast (Register Reg, MachineRegisterInfo &MRI) {
2475+ Register BitcastSrc;
2476+ if (mi_match (Reg, MRI, m_GBitcast (m_Reg (BitcastSrc))))
2477+ Reg = BitcastSrc;
2478+ return Reg;
2479+ }
2480+
24702481static bool isExtractHiElt (MachineRegisterInfo &MRI, Register In,
24712482 Register &Out) {
2483+ Register Trunc;
2484+ if (!mi_match (In, MRI, m_GTrunc (m_Reg (Trunc))))
2485+ return false ;
2486+
24722487 Register LShlSrc;
2473- if (mi_match (In, MRI,
2474- m_GTrunc (m_GLShr (m_Reg (LShlSrc), m_SpecificICst (16 ))))) {
2475- Out = LShlSrc;
2488+ Register Cst;
2489+ if (mi_match (Trunc, MRI, m_GLShr (m_Reg (LShlSrc), m_Reg (Cst)))) {
2490+ Cst = stripCopy (Cst, MRI);
2491+ if (mi_match (Cst, MRI, m_SpecificICst (16 ))) {
2492+ Out = stripBitCast (LShlSrc, MRI);
2493+ return true ;
2494+ }
2495+ }
2496+
2497+ MachineInstr *Shuffle = MRI.getVRegDef (Trunc);
2498+ if (Shuffle->getOpcode () != AMDGPU::G_SHUFFLE_VECTOR)
2499+ return false ;
2500+
2501+ assert (MRI.getType (Shuffle->getOperand (0 ).getReg ()) ==
2502+ LLT::fixed_vector (2 , 16 ));
2503+
2504+ ArrayRef<int > Mask = Shuffle->getOperand (3 ).getShuffleMask ();
2505+ assert (Mask.size () == 2 );
2506+
2507+ if (Mask[0 ] == 1 && Mask[1 ] <= 1 ) {
2508+ Out = Shuffle->getOperand (0 ).getReg ();
24762509 return true ;
24772510 }
2511+
24782512 return false ;
24792513}
24802514
@@ -3550,11 +3584,8 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
35503584
35513585}
35523586
3553- std::pair<Register, unsigned >
3554- AMDGPUInstructionSelector::selectVOP3ModsImpl (MachineOperand &Root,
3555- bool IsCanonicalizing,
3556- bool AllowAbs, bool OpSel) const {
3557- Register Src = Root.getReg ();
3587+ std::pair<Register, unsigned > AMDGPUInstructionSelector::selectVOP3ModsImpl (
3588+ Register Src, bool IsCanonicalizing, bool AllowAbs, bool OpSel) const {
35583589 unsigned Mods = 0 ;
35593590 MachineInstr *MI = getDefIgnoringCopies (Src, *MRI);
35603591
@@ -3617,7 +3648,7 @@ InstructionSelector::ComplexRendererFns
36173648AMDGPUInstructionSelector::selectVOP3Mods0 (MachineOperand &Root) const {
36183649 Register Src;
36193650 unsigned Mods;
3620- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
3651+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () );
36213652
36223653 return {{
36233654 [=](MachineInstrBuilder &MIB) {
@@ -3633,7 +3664,7 @@ InstructionSelector::ComplexRendererFns
36333664AMDGPUInstructionSelector::selectVOP3BMods0 (MachineOperand &Root) const {
36343665 Register Src;
36353666 unsigned Mods;
3636- std::tie (Src, Mods) = selectVOP3ModsImpl (Root,
3667+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () ,
36373668 /* IsCanonicalizing=*/ true ,
36383669 /* AllowAbs=*/ false );
36393670
@@ -3660,7 +3691,7 @@ InstructionSelector::ComplexRendererFns
36603691AMDGPUInstructionSelector::selectVOP3Mods (MachineOperand &Root) const {
36613692 Register Src;
36623693 unsigned Mods;
3663- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
3694+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () );
36643695
36653696 return {{
36663697 [=](MachineInstrBuilder &MIB) {
@@ -3675,7 +3706,8 @@ AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
36753706 MachineOperand &Root) const {
36763707 Register Src;
36773708 unsigned Mods;
3678- std::tie (Src, Mods) = selectVOP3ModsImpl (Root, /* IsCanonicalizing=*/ false );
3709+ std::tie (Src, Mods) =
3710+ selectVOP3ModsImpl (Root.getReg (), /* IsCanonicalizing=*/ false );
36793711
36803712 return {{
36813713 [=](MachineInstrBuilder &MIB) {
@@ -3689,8 +3721,9 @@ InstructionSelector::ComplexRendererFns
36893721AMDGPUInstructionSelector::selectVOP3BMods (MachineOperand &Root) const {
36903722 Register Src;
36913723 unsigned Mods;
3692- std::tie (Src, Mods) = selectVOP3ModsImpl (Root, /* IsCanonicalizing=*/ true ,
3693- /* AllowAbs=*/ false );
3724+ std::tie (Src, Mods) =
3725+ selectVOP3ModsImpl (Root.getReg (), /* IsCanonicalizing=*/ true ,
3726+ /* AllowAbs=*/ false );
36943727
36953728 return {{
36963729 [=](MachineInstrBuilder &MIB) {
@@ -4016,7 +4049,7 @@ InstructionSelector::ComplexRendererFns
40164049AMDGPUInstructionSelector::selectVOP3OpSelMods (MachineOperand &Root) const {
40174050 Register Src;
40184051 unsigned Mods;
4019- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
4052+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () );
40204053
40214054 // FIXME: Handle op_sel
40224055 return {{
@@ -4029,7 +4062,7 @@ InstructionSelector::ComplexRendererFns
40294062AMDGPUInstructionSelector::selectVINTERPMods (MachineOperand &Root) const {
40304063 Register Src;
40314064 unsigned Mods;
4032- std::tie (Src, Mods) = selectVOP3ModsImpl (Root,
4065+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () ,
40334066 /* IsCanonicalizing=*/ true ,
40344067 /* AllowAbs=*/ false ,
40354068 /* OpSel=*/ false );
@@ -4047,7 +4080,7 @@ InstructionSelector::ComplexRendererFns
40474080AMDGPUInstructionSelector::selectVINTERPModsHi (MachineOperand &Root) const {
40484081 Register Src;
40494082 unsigned Mods;
4050- std::tie (Src, Mods) = selectVOP3ModsImpl (Root,
4083+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () ,
40514084 /* IsCanonicalizing=*/ true ,
40524085 /* AllowAbs=*/ false ,
40534086 /* OpSel=*/ true );
@@ -5229,97 +5262,41 @@ AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
52295262 [=](MachineInstrBuilder &MIB) { MIB.addImm (*EncodedOffset); }}};
52305263}
52315264
5232- // Variant of stripBitCast that returns the instruction instead of a
5233- // MachineOperand.
5234- static MachineInstr *stripBitCast (MachineInstr *MI, MachineRegisterInfo &MRI) {
5235- if (MI->getOpcode () == AMDGPU::G_BITCAST)
5236- return getDefIgnoringCopies (MI->getOperand (1 ).getReg (), MRI);
5237- return MI;
5238- }
5239-
5240- // Figure out if this is really an extract of the high 16-bits of a dword,
5241- // returns nullptr if it isn't.
5242- static MachineInstr *isExtractHiElt (MachineInstr *Inst,
5243- MachineRegisterInfo &MRI) {
5244- Inst = stripBitCast (Inst, MRI);
5245-
5246- if (Inst->getOpcode () != AMDGPU::G_TRUNC)
5247- return nullptr ;
5248-
5249- MachineInstr *TruncOp =
5250- getDefIgnoringCopies (Inst->getOperand (1 ).getReg (), MRI);
5251- TruncOp = stripBitCast (TruncOp, MRI);
5252-
5253- // G_LSHR x, (G_CONSTANT i32 16)
5254- if (TruncOp->getOpcode () == AMDGPU::G_LSHR) {
5255- auto SrlAmount = getIConstantVRegValWithLookThrough (
5256- TruncOp->getOperand (2 ).getReg (), MRI);
5257- if (SrlAmount && SrlAmount->Value .getZExtValue () == 16 ) {
5258- MachineInstr *SrlOp =
5259- getDefIgnoringCopies (TruncOp->getOperand (1 ).getReg (), MRI);
5260- return stripBitCast (SrlOp, MRI);
5261- }
5262- }
5263-
5264- // G_SHUFFLE_VECTOR x, y, shufflemask(1, 1|0)
5265- // 1, 0 swaps the low/high 16 bits.
5266- // 1, 1 sets the high 16 bits to be the same as the low 16.
5267- // in any case, it selects the high elts.
5268- if (TruncOp->getOpcode () == AMDGPU::G_SHUFFLE_VECTOR) {
5269- assert (MRI.getType (TruncOp->getOperand (0 ).getReg ()) ==
5270- LLT::fixed_vector (2 , 16 ));
5271-
5272- ArrayRef<int > Mask = TruncOp->getOperand (3 ).getShuffleMask ();
5273- assert (Mask.size () == 2 );
5274-
5275- if (Mask[0 ] == 1 && Mask[1 ] <= 1 ) {
5276- MachineInstr *LHS =
5277- getDefIgnoringCopies (TruncOp->getOperand (1 ).getReg (), MRI);
5278- return stripBitCast (LHS, MRI);
5279- }
5280- }
5281-
5282- return nullptr ;
5283- }
5284-
52855265std::pair<Register, unsigned >
52865266AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl (MachineOperand &Root,
52875267 bool &Matched) const {
52885268 Matched = false ;
52895269
52905270 Register Src;
52915271 unsigned Mods;
5292- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
5293-
5294- MachineInstr *MI = getDefIgnoringCopies (Src, *MRI);
5295- if (MI->getOpcode () == AMDGPU::G_FPEXT) {
5296- MachineOperand *MO = &MI->getOperand (1 );
5297- Src = MO->getReg ();
5298- MI = getDefIgnoringCopies (Src, *MRI);
5272+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root.getReg ());
52995273
5274+ if (mi_match (Src, *MRI, m_GFPExt (m_Reg (Src)))) {
53005275 assert (MRI->getType (Src) == LLT::scalar (16 ));
53015276
5302- // See through bitcasts.
5303- // FIXME: Would be nice to use stripBitCast here.
5304- if (MI->getOpcode () == AMDGPU::G_BITCAST) {
5305- MO = &MI->getOperand (1 );
5306- Src = MO->getReg ();
5307- MI = getDefIgnoringCopies (Src, *MRI);
5308- }
5277+ // Only change Src if src modifier could be gained. In such cases new Src
5278+ // could be sgpr but this does not violate constant bus restriction for
5279+ // instruction that is being selected.
5280+ // Note: Src is not changed when there is only a simple sgpr to vgpr copy
5281+ // since this could violate constant bus restriction.
5282+ Register PeekSrc = stripCopy (Src, *MRI);
53095283
53105284 const auto CheckAbsNeg = [&]() {
53115285 // Be careful about folding modifiers if we already have an abs. fneg is
53125286 // applied last, so we don't want to apply an earlier fneg.
53135287 if ((Mods & SISrcMods::ABS) == 0 ) {
53145288 unsigned ModsTmp;
5315- std::tie (Src, ModsTmp) = selectVOP3ModsImpl (*MO);
5316- MI = getDefIgnoringCopies (Src, *MRI);
5289+ std::tie (PeekSrc, ModsTmp) = selectVOP3ModsImpl (PeekSrc);
53175290
5318- if ((ModsTmp & SISrcMods::NEG) != 0 )
5291+ if ((ModsTmp & SISrcMods::NEG) != 0 ) {
53195292 Mods ^= SISrcMods::NEG;
5293+ Src = PeekSrc;
5294+ }
53205295
5321- if ((ModsTmp & SISrcMods::ABS) != 0 )
5296+ if ((ModsTmp & SISrcMods::ABS) != 0 ) {
53225297 Mods |= SISrcMods::ABS;
5298+ Src = PeekSrc;
5299+ }
53235300 }
53245301 };
53255302
@@ -5332,12 +5309,9 @@ AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
53325309
53335310 Mods |= SISrcMods::OP_SEL_1;
53345311
5335- if (MachineInstr *ExtractHiEltMI = isExtractHiElt (MI, *MRI)) {
5312+ if (isExtractHiElt (*MRI, PeekSrc, PeekSrc)) {
5313+ Src = PeekSrc;
53365314 Mods |= SISrcMods::OP_SEL_0;
5337- MI = ExtractHiEltMI;
5338- MO = &MI->getOperand (0 );
5339- Src = MO->getReg ();
5340-
53415315 CheckAbsNeg ();
53425316 }
53435317
0 commit comments