@@ -1372,8 +1372,8 @@ bool AMDGPUInstructionSelector::selectIntrinsicCmp(MachineInstr &I) const {
13721372 MachineInstrBuilder SelectedMI;
13731373 MachineOperand &LHS = I.getOperand (2 );
13741374 MachineOperand &RHS = I.getOperand (3 );
1375- auto [Src0, Src0Mods] = selectVOP3ModsImpl (LHS);
1376- auto [Src1, Src1Mods] = selectVOP3ModsImpl (RHS);
1375+ auto [Src0, Src0Mods] = selectVOP3ModsImpl (LHS. getReg () );
1376+ auto [Src1, Src1Mods] = selectVOP3ModsImpl (RHS. getReg () );
13771377 Register Src0Reg =
13781378 copyToVGPRIfSrcFolded (Src0, Src0Mods, LHS, &I, /* ForceVGPR*/ true );
13791379 Register Src1Reg =
@@ -2487,14 +2487,48 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
24872487 return false ;
24882488}
24892489
2490+ static Register stripCopy (Register Reg, MachineRegisterInfo &MRI) {
2491+ return getDefSrcRegIgnoringCopies (Reg, MRI)->Reg ;
2492+ }
2493+
2494+ static Register stripBitCast (Register Reg, MachineRegisterInfo &MRI) {
2495+ Register BitcastSrc;
2496+ if (mi_match (Reg, MRI, m_GBitcast (m_Reg (BitcastSrc))))
2497+ Reg = BitcastSrc;
2498+ return Reg;
2499+ }
2500+
24902501static bool isExtractHiElt (MachineRegisterInfo &MRI, Register In,
24912502 Register &Out) {
2503+ Register Trunc;
2504+ if (!mi_match (In, MRI, m_GTrunc (m_Reg (Trunc))))
2505+ return false ;
2506+
24922507 Register LShlSrc;
2493- if (mi_match (In, MRI,
2494- m_GTrunc (m_GLShr (m_Reg (LShlSrc), m_SpecificICst (16 ))))) {
2495- Out = LShlSrc;
2508+ Register Cst;
2509+ if (mi_match (Trunc, MRI, m_GLShr (m_Reg (LShlSrc), m_Reg (Cst)))) {
2510+ Cst = stripCopy (Cst, MRI);
2511+ if (mi_match (Cst, MRI, m_SpecificICst (16 ))) {
2512+ Out = stripBitCast (LShlSrc, MRI);
2513+ return true ;
2514+ }
2515+ }
2516+
2517+ MachineInstr *Shuffle = MRI.getVRegDef (Trunc);
2518+ if (Shuffle->getOpcode () != AMDGPU::G_SHUFFLE_VECTOR)
2519+ return false ;
2520+
2521+ assert (MRI.getType (Shuffle->getOperand (0 ).getReg ()) ==
2522+ LLT::fixed_vector (2 , 16 ));
2523+
2524+ ArrayRef<int > Mask = Shuffle->getOperand (3 ).getShuffleMask ();
2525+ assert (Mask.size () == 2 );
2526+
2527+ if (Mask[0 ] == 1 && Mask[1 ] <= 1 ) {
2528+ Out = Shuffle->getOperand (0 ).getReg ();
24962529 return true ;
24972530 }
2531+
24982532 return false ;
24992533}
25002534
@@ -3654,11 +3688,8 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
36543688
36553689}
36563690
3657- std::pair<Register, unsigned >
3658- AMDGPUInstructionSelector::selectVOP3ModsImpl (MachineOperand &Root,
3659- bool IsCanonicalizing,
3660- bool AllowAbs, bool OpSel) const {
3661- Register Src = Root.getReg ();
3691+ std::pair<Register, unsigned > AMDGPUInstructionSelector::selectVOP3ModsImpl (
3692+ Register Src, bool IsCanonicalizing, bool AllowAbs, bool OpSel) const {
36623693 unsigned Mods = 0 ;
36633694 MachineInstr *MI = getDefIgnoringCopies (Src, *MRI);
36643695
@@ -3721,7 +3752,7 @@ InstructionSelector::ComplexRendererFns
37213752AMDGPUInstructionSelector::selectVOP3Mods0 (MachineOperand &Root) const {
37223753 Register Src;
37233754 unsigned Mods;
3724- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
3755+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () );
37253756
37263757 return {{
37273758 [=](MachineInstrBuilder &MIB) {
@@ -3737,7 +3768,7 @@ InstructionSelector::ComplexRendererFns
37373768AMDGPUInstructionSelector::selectVOP3BMods0 (MachineOperand &Root) const {
37383769 Register Src;
37393770 unsigned Mods;
3740- std::tie (Src, Mods) = selectVOP3ModsImpl (Root,
3771+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () ,
37413772 /* IsCanonicalizing=*/ true ,
37423773 /* AllowAbs=*/ false );
37433774
@@ -3764,7 +3795,7 @@ InstructionSelector::ComplexRendererFns
37643795AMDGPUInstructionSelector::selectVOP3Mods (MachineOperand &Root) const {
37653796 Register Src;
37663797 unsigned Mods;
3767- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
3798+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () );
37683799
37693800 return {{
37703801 [=](MachineInstrBuilder &MIB) {
@@ -3779,7 +3810,8 @@ AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
37793810 MachineOperand &Root) const {
37803811 Register Src;
37813812 unsigned Mods;
3782- std::tie (Src, Mods) = selectVOP3ModsImpl (Root, /* IsCanonicalizing=*/ false );
3813+ std::tie (Src, Mods) =
3814+ selectVOP3ModsImpl (Root.getReg (), /* IsCanonicalizing=*/ false );
37833815
37843816 return {{
37853817 [=](MachineInstrBuilder &MIB) {
@@ -3793,8 +3825,9 @@ InstructionSelector::ComplexRendererFns
37933825AMDGPUInstructionSelector::selectVOP3BMods (MachineOperand &Root) const {
37943826 Register Src;
37953827 unsigned Mods;
3796- std::tie (Src, Mods) = selectVOP3ModsImpl (Root, /* IsCanonicalizing=*/ true ,
3797- /* AllowAbs=*/ false );
3828+ std::tie (Src, Mods) =
3829+ selectVOP3ModsImpl (Root.getReg (), /* IsCanonicalizing=*/ true ,
3830+ /* AllowAbs=*/ false );
37983831
37993832 return {{
38003833 [=](MachineInstrBuilder &MIB) {
@@ -4120,7 +4153,7 @@ InstructionSelector::ComplexRendererFns
41204153AMDGPUInstructionSelector::selectVOP3OpSelMods (MachineOperand &Root) const {
41214154 Register Src;
41224155 unsigned Mods;
4123- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
4156+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () );
41244157
41254158 // FIXME: Handle op_sel
41264159 return {{
@@ -4133,7 +4166,7 @@ InstructionSelector::ComplexRendererFns
41334166AMDGPUInstructionSelector::selectVINTERPMods (MachineOperand &Root) const {
41344167 Register Src;
41354168 unsigned Mods;
4136- std::tie (Src, Mods) = selectVOP3ModsImpl (Root,
4169+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () ,
41374170 /* IsCanonicalizing=*/ true ,
41384171 /* AllowAbs=*/ false ,
41394172 /* OpSel=*/ false );
@@ -4151,7 +4184,7 @@ InstructionSelector::ComplexRendererFns
41514184AMDGPUInstructionSelector::selectVINTERPModsHi (MachineOperand &Root) const {
41524185 Register Src;
41534186 unsigned Mods;
4154- std::tie (Src, Mods) = selectVOP3ModsImpl (Root,
4187+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () ,
41554188 /* IsCanonicalizing=*/ true ,
41564189 /* AllowAbs=*/ false ,
41574190 /* OpSel=*/ true );
@@ -5333,97 +5366,41 @@ AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
53335366 [=](MachineInstrBuilder &MIB) { MIB.addImm (*EncodedOffset); }}};
53345367}
53355368
5336- // Variant of stripBitCast that returns the instruction instead of a
5337- // MachineOperand.
5338- static MachineInstr *stripBitCast (MachineInstr *MI, MachineRegisterInfo &MRI) {
5339- if (MI->getOpcode () == AMDGPU::G_BITCAST)
5340- return getDefIgnoringCopies (MI->getOperand (1 ).getReg (), MRI);
5341- return MI;
5342- }
5343-
5344- // Figure out if this is really an extract of the high 16-bits of a dword,
5345- // returns nullptr if it isn't.
5346- static MachineInstr *isExtractHiElt (MachineInstr *Inst,
5347- MachineRegisterInfo &MRI) {
5348- Inst = stripBitCast (Inst, MRI);
5349-
5350- if (Inst->getOpcode () != AMDGPU::G_TRUNC)
5351- return nullptr ;
5352-
5353- MachineInstr *TruncOp =
5354- getDefIgnoringCopies (Inst->getOperand (1 ).getReg (), MRI);
5355- TruncOp = stripBitCast (TruncOp, MRI);
5356-
5357- // G_LSHR x, (G_CONSTANT i32 16)
5358- if (TruncOp->getOpcode () == AMDGPU::G_LSHR) {
5359- auto SrlAmount = getIConstantVRegValWithLookThrough (
5360- TruncOp->getOperand (2 ).getReg (), MRI);
5361- if (SrlAmount && SrlAmount->Value .getZExtValue () == 16 ) {
5362- MachineInstr *SrlOp =
5363- getDefIgnoringCopies (TruncOp->getOperand (1 ).getReg (), MRI);
5364- return stripBitCast (SrlOp, MRI);
5365- }
5366- }
5367-
5368- // G_SHUFFLE_VECTOR x, y, shufflemask(1, 1|0)
5369- // 1, 0 swaps the low/high 16 bits.
5370- // 1, 1 sets the high 16 bits to be the same as the low 16.
5371- // in any case, it selects the high elts.
5372- if (TruncOp->getOpcode () == AMDGPU::G_SHUFFLE_VECTOR) {
5373- assert (MRI.getType (TruncOp->getOperand (0 ).getReg ()) ==
5374- LLT::fixed_vector (2 , 16 ));
5375-
5376- ArrayRef<int > Mask = TruncOp->getOperand (3 ).getShuffleMask ();
5377- assert (Mask.size () == 2 );
5378-
5379- if (Mask[0 ] == 1 && Mask[1 ] <= 1 ) {
5380- MachineInstr *LHS =
5381- getDefIgnoringCopies (TruncOp->getOperand (1 ).getReg (), MRI);
5382- return stripBitCast (LHS, MRI);
5383- }
5384- }
5385-
5386- return nullptr ;
5387- }
5388-
53895369std::pair<Register, unsigned >
53905370AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl (MachineOperand &Root,
53915371 bool &Matched) const {
53925372 Matched = false ;
53935373
53945374 Register Src;
53955375 unsigned Mods;
5396- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
5397-
5398- MachineInstr *MI = getDefIgnoringCopies (Src, *MRI);
5399- if (MI->getOpcode () == AMDGPU::G_FPEXT) {
5400- MachineOperand *MO = &MI->getOperand (1 );
5401- Src = MO->getReg ();
5402- MI = getDefIgnoringCopies (Src, *MRI);
5376+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root.getReg ());
54035377
5378+ if (mi_match (Src, *MRI, m_GFPExt (m_Reg (Src)))) {
54045379 assert (MRI->getType (Src) == LLT::scalar (16 ));
54055380
5406- // See through bitcasts.
5407- // FIXME: Would be nice to use stripBitCast here.
5408- if (MI->getOpcode () == AMDGPU::G_BITCAST) {
5409- MO = &MI->getOperand (1 );
5410- Src = MO->getReg ();
5411- MI = getDefIgnoringCopies (Src, *MRI);
5412- }
5381+ // Only change Src if src modifier could be gained. In such cases new Src
5382+ // could be sgpr but this does not violate constant bus restriction for
5383+ // instruction that is being selected.
5384+ // Note: Src is not changed when there is only a simple sgpr to vgpr copy
5385+ // since this could violate constant bus restriction.
5386+ Register PeekSrc = stripCopy (Src, *MRI);
54135387
54145388 const auto CheckAbsNeg = [&]() {
54155389 // Be careful about folding modifiers if we already have an abs. fneg is
54165390 // applied last, so we don't want to apply an earlier fneg.
54175391 if ((Mods & SISrcMods::ABS) == 0 ) {
54185392 unsigned ModsTmp;
5419- std::tie (Src, ModsTmp) = selectVOP3ModsImpl (*MO);
5420- MI = getDefIgnoringCopies (Src, *MRI);
5393+ std::tie (PeekSrc, ModsTmp) = selectVOP3ModsImpl (PeekSrc);
54215394
5422- if ((ModsTmp & SISrcMods::NEG) != 0 )
5395+ if ((ModsTmp & SISrcMods::NEG) != 0 ) {
54235396 Mods ^= SISrcMods::NEG;
5397+ Src = PeekSrc;
5398+ }
54245399
5425- if ((ModsTmp & SISrcMods::ABS) != 0 )
5400+ if ((ModsTmp & SISrcMods::ABS) != 0 ) {
54265401 Mods |= SISrcMods::ABS;
5402+ Src = PeekSrc;
5403+ }
54275404 }
54285405 };
54295406
@@ -5436,12 +5413,9 @@ AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
54365413
54375414 Mods |= SISrcMods::OP_SEL_1;
54385415
5439- if (MachineInstr *ExtractHiEltMI = isExtractHiElt (MI, *MRI)) {
5416+ if (isExtractHiElt (*MRI, PeekSrc, PeekSrc)) {
5417+ Src = PeekSrc;
54405418 Mods |= SISrcMods::OP_SEL_0;
5441- MI = ExtractHiEltMI;
5442- MO = &MI->getOperand (0 );
5443- Src = MO->getReg ();
5444-
54455419 CheckAbsNeg ();
54465420 }
54475421
0 commit comments