@@ -3273,6 +3273,10 @@ StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
32733273 return AMDGPUInstPrinter::getRegisterName (Reg);
32743274}
32753275
3276+ unsigned SIRegisterInfo::getHWRegIndex (MCRegister Reg) const {
3277+ return getEncodingValue (Reg) & AMDGPU::HWEncoding::REG_IDX_MASK;
3278+ }
3279+
32763280unsigned AMDGPU::getRegBitWidth (const TargetRegisterClass &RC) {
32773281 return getRegBitWidth (RC.getID ());
32783282}
@@ -3353,6 +3357,40 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const {
33533357 : getAnyVGPRClassForBitWidth (BitWidth);
33543358}
33553359
3360+ const TargetRegisterClass *
3361+ SIRegisterInfo::getAlignedLo256VGPRClassForBitWidth (unsigned BitWidth) const {
3362+ if (BitWidth <= 32 )
3363+ return &AMDGPU::VGPR_32_Lo256RegClass;
3364+ if (BitWidth <= 64 )
3365+ return &AMDGPU::VReg_64_Lo256_Align2RegClass;
3366+ if (BitWidth <= 96 )
3367+ return &AMDGPU::VReg_96_Lo256_Align2RegClass;
3368+ if (BitWidth <= 128 )
3369+ return &AMDGPU::VReg_128_Lo256_Align2RegClass;
3370+ if (BitWidth <= 160 )
3371+ return &AMDGPU::VReg_160_Lo256_Align2RegClass;
3372+ if (BitWidth <= 192 )
3373+ return &AMDGPU::VReg_192_Lo256_Align2RegClass;
3374+ if (BitWidth <= 224 )
3375+ return &AMDGPU::VReg_224_Lo256_Align2RegClass;
3376+ if (BitWidth <= 256 )
3377+ return &AMDGPU::VReg_256_Lo256_Align2RegClass;
3378+ if (BitWidth <= 288 )
3379+ return &AMDGPU::VReg_288_Lo256_Align2RegClass;
3380+ if (BitWidth <= 320 )
3381+ return &AMDGPU::VReg_320_Lo256_Align2RegClass;
3382+ if (BitWidth <= 352 )
3383+ return &AMDGPU::VReg_352_Lo256_Align2RegClass;
3384+ if (BitWidth <= 384 )
3385+ return &AMDGPU::VReg_384_Lo256_Align2RegClass;
3386+ if (BitWidth <= 512 )
3387+ return &AMDGPU::VReg_512_Lo256_Align2RegClass;
3388+ if (BitWidth <= 1024 )
3389+ return &AMDGPU::VReg_1024_Lo256_Align2RegClass;
3390+
3391+ return nullptr ;
3392+ }
3393+
33563394static const TargetRegisterClass *
33573395getAnyAGPRClassForBitWidth (unsigned BitWidth) {
33583396 if (BitWidth == 64 )
@@ -3547,7 +3585,17 @@ bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
35473585const TargetRegisterClass *
35483586SIRegisterInfo::getEquivalentVGPRClass (const TargetRegisterClass *SRC) const {
35493587 unsigned Size = getRegSizeInBits (*SRC);
3550- const TargetRegisterClass *VRC = getVGPRClassForBitWidth (Size);
3588+
3589+ switch (SRC->getID ()) {
3590+ default :
3591+ break ;
3592+ case AMDGPU::VS_32_Lo256RegClassID:
3593+ case AMDGPU::VS_64_Lo256RegClassID:
3594+ return getAllocatableClass (getAlignedLo256VGPRClassForBitWidth (Size));
3595+ }
3596+
3597+ const TargetRegisterClass *VRC =
3598+ getAllocatableClass (getVGPRClassForBitWidth (Size));
35513599 assert (VRC && " Invalid register class size" );
35523600 return VRC;
35533601}
@@ -4005,7 +4053,12 @@ SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
40054053unsigned SIRegisterInfo::getNumUsedPhysRegs (const MachineRegisterInfo &MRI,
40064054 const TargetRegisterClass &RC,
40074055 bool IncludeCalls) const {
4008- for (MCPhysReg Reg : reverse (RC.getRegisters ()))
4056+ unsigned NumArchVGPRs = ST.has1024AddressableVGPRs () ? 1024 : 256 ;
4057+ ArrayRef<MCPhysReg> Registers =
4058+ (RC.getID () == AMDGPU::VGPR_32RegClassID)
4059+ ? RC.getRegisters ().take_front (NumArchVGPRs)
4060+ : RC.getRegisters ();
4061+ for (MCPhysReg Reg : reverse (Registers))
40094062 if (MRI.isPhysRegUsed (Reg, /* SkipRegMaskTest=*/ !IncludeCalls))
40104063 return getHWRegIndex (Reg) + 1 ;
40114064 return 0 ;
0 commit comments