@@ -3273,6 +3273,10 @@ StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
32733273  return  AMDGPUInstPrinter::getRegisterName (Reg);
32743274}
32753275
3276+ unsigned  SIRegisterInfo::getHWRegIndex (MCRegister Reg) const  {
3277+   return  getEncodingValue (Reg) & AMDGPU::HWEncoding::REG_IDX_MASK;
3278+ }
3279+ 
32763280unsigned  AMDGPU::getRegBitWidth (const  TargetRegisterClass &RC) {
32773281  return  getRegBitWidth (RC.getID ());
32783282}
@@ -3353,6 +3357,40 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const {
33533357                                : getAnyVGPRClassForBitWidth (BitWidth);
33543358}
33553359
3360+ const  TargetRegisterClass *
3361+ SIRegisterInfo::getAlignedLo256VGPRClassForBitWidth (unsigned  BitWidth) const  {
3362+   if  (BitWidth <= 32 )
3363+     return  &AMDGPU::VGPR_32_Lo256RegClass;
3364+   if  (BitWidth <= 64 )
3365+     return  &AMDGPU::VReg_64_Lo256_Align2RegClass;
3366+   if  (BitWidth <= 96 )
3367+     return  &AMDGPU::VReg_96_Lo256_Align2RegClass;
3368+   if  (BitWidth <= 128 )
3369+     return  &AMDGPU::VReg_128_Lo256_Align2RegClass;
3370+   if  (BitWidth <= 160 )
3371+     return  &AMDGPU::VReg_160_Lo256_Align2RegClass;
3372+   if  (BitWidth <= 192 )
3373+     return  &AMDGPU::VReg_192_Lo256_Align2RegClass;
3374+   if  (BitWidth <= 224 )
3375+     return  &AMDGPU::VReg_224_Lo256_Align2RegClass;
3376+   if  (BitWidth <= 256 )
3377+     return  &AMDGPU::VReg_256_Lo256_Align2RegClass;
3378+   if  (BitWidth <= 288 )
3379+     return  &AMDGPU::VReg_288_Lo256_Align2RegClass;
3380+   if  (BitWidth <= 320 )
3381+     return  &AMDGPU::VReg_320_Lo256_Align2RegClass;
3382+   if  (BitWidth <= 352 )
3383+     return  &AMDGPU::VReg_352_Lo256_Align2RegClass;
3384+   if  (BitWidth <= 384 )
3385+     return  &AMDGPU::VReg_384_Lo256_Align2RegClass;
3386+   if  (BitWidth <= 512 )
3387+     return  &AMDGPU::VReg_512_Lo256_Align2RegClass;
3388+   if  (BitWidth <= 1024 )
3389+     return  &AMDGPU::VReg_1024_Lo256_Align2RegClass;
3390+ 
3391+   return  nullptr ;
3392+ }
3393+ 
33563394static  const  TargetRegisterClass *
33573395getAnyAGPRClassForBitWidth (unsigned  BitWidth) {
33583396  if  (BitWidth == 64 )
@@ -3547,7 +3585,17 @@ bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
35473585const  TargetRegisterClass *
35483586SIRegisterInfo::getEquivalentVGPRClass (const  TargetRegisterClass *SRC) const  {
35493587  unsigned  Size = getRegSizeInBits (*SRC);
3550-   const  TargetRegisterClass *VRC = getVGPRClassForBitWidth (Size);
3588+ 
3589+   switch  (SRC->getID ()) {
3590+   default :
3591+     break ;
3592+   case  AMDGPU::VS_32_Lo256RegClassID:
3593+   case  AMDGPU::VS_64_Lo256RegClassID:
3594+     return  getAllocatableClass (getAlignedLo256VGPRClassForBitWidth (Size));
3595+   }
3596+ 
3597+   const  TargetRegisterClass *VRC =
3598+       getAllocatableClass (getVGPRClassForBitWidth (Size));
35513599  assert (VRC && " Invalid register class size"  );
35523600  return  VRC;
35533601}
@@ -4005,7 +4053,12 @@ SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
40054053unsigned  SIRegisterInfo::getNumUsedPhysRegs (const  MachineRegisterInfo &MRI,
40064054                                            const  TargetRegisterClass &RC,
40074055                                            bool  IncludeCalls) const  {
4008-   for  (MCPhysReg Reg : reverse (RC.getRegisters ()))
4056+   unsigned  NumArchVGPRs = ST.has1024AddressableVGPRs () ? 1024  : 256 ;
4057+   ArrayRef<MCPhysReg> Registers =
4058+       (RC.getID () == AMDGPU::VGPR_32RegClassID)
4059+           ? RC.getRegisters ().take_front (NumArchVGPRs)
4060+           : RC.getRegisters ();
4061+   for  (MCPhysReg Reg : reverse (Registers))
40094062    if  (MRI.isPhysRegUsed (Reg, /* SkipRegMaskTest=*/  !IncludeCalls))
40104063      return  getHWRegIndex (Reg) + 1 ;
40114064  return  0 ;
0 commit comments