@@ -60,6 +60,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
6060 return MRI.getType (Reg) == LLT::pointer (4 , 64 );
6161 case P5:
6262 return MRI.getType (Reg) == LLT::pointer (5 , 32 );
63+ case V2S32:
64+ return MRI.getType (Reg) == LLT::fixed_vector (2 , 32 );
6365 case V4S32:
6466 return MRI.getType (Reg) == LLT::fixed_vector (4 , 32 );
6567 case B32:
@@ -92,6 +94,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
9294 return MRI.getType (Reg) == LLT::pointer (4 , 64 ) && MUI.isUniform (Reg);
9395 case UniP5:
9496 return MRI.getType (Reg) == LLT::pointer (5 , 32 ) && MUI.isUniform (Reg);
97+ case UniV2S16:
98+ return MRI.getType (Reg) == LLT::fixed_vector (2 , 16 ) && MUI.isUniform (Reg);
9599 case UniB32:
96100 return MRI.getType (Reg).getSizeInBits () == 32 && MUI.isUniform (Reg);
97101 case UniB64:
@@ -122,6 +126,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
122126 return MRI.getType (Reg) == LLT::pointer (4 , 64 ) && MUI.isDivergent (Reg);
123127 case DivP5:
124128 return MRI.getType (Reg) == LLT::pointer (5 , 32 ) && MUI.isDivergent (Reg);
129+ case DivV2S16:
130+ return MRI.getType (Reg) == LLT::fixed_vector (2 , 16 ) && MUI.isDivergent (Reg);
125131 case DivB32:
126132 return MRI.getType (Reg).getSizeInBits () == 32 && MUI.isDivergent (Reg);
127133 case DivB64:
@@ -435,7 +441,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
435441 MachineRegisterInfo &_MRI)
436442 : ST(&_ST), MRI(&_MRI) {
437443
438- addRulesForGOpcs ({G_ADD}, Standard)
444+ addRulesForGOpcs ({G_ADD, G_SUB }, Standard)
439445 .Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
440446 .Div (S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
441447
@@ -452,11 +458,36 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
452458 .Div (B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
453459
454460 addRulesForGOpcs ({G_SHL}, Standard)
461+ .Uni (S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
462+ .Div (S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
463+ .Uni (V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
464+ .Div (V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
465+ .Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
466+ .Uni (S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
455467 .Div (S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
468+ .Div (S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
469+
470+ addRulesForGOpcs ({G_LSHR}, Standard)
471+ .Uni (S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
472+ .Div (S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
473+ .Uni (V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
474+ .Div (V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
475+ .Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
456476 .Uni (S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
477+ .Div (S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
457478 .Div (S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
458479
459- addRulesForGOpcs ({G_LSHR}, Standard).Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32}});
480+ addRulesForGOpcs ({G_ASHR}, Standard)
481+ .Uni (S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
482+ .Div (S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
483+ .Uni (V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
484+ .Div (V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
485+ .Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
486+ .Uni (S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
487+ .Div (S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
488+ .Div (S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
489+
490+ addRulesForGOpcs ({G_FRAME_INDEX}).Any ({{UniP5, _}, {{SgprP5}, {None}}});
460491
461492 addRulesForGOpcs ({G_UBFX, G_SBFX}, Standard)
462493 .Uni (S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
@@ -515,6 +546,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
515546 .Any ({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
516547 .Any ({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
517548 .Any ({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
549+ .Any ({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
550+ .Any ({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
518551 // This is non-trivial. VgprToVccCopy is done using compare instruction.
519552 .Any ({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}})
520553 .Any ({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
@@ -550,6 +583,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
550583 .Any ({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
551584 .Any ({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
552585
586+ addRulesForGOpcs ({G_SEXT_INREG})
587+ .Any ({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
588+ .Any ({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
589+ .Any ({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
590+ .Any ({{DivS64, S64}, {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}});
591+
553592 bool hasUnalignedLoads = ST->getGeneration () >= AMDGPUSubtarget::GFX12;
554593 bool hasSMRDSmall = ST->hasScalarSubwordLoads ();
555594
0 commit comments