@@ -3273,6 +3273,10 @@ StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
3273
3273
return AMDGPUInstPrinter::getRegisterName (Reg);
3274
3274
}
3275
3275
3276
+ unsigned SIRegisterInfo::getHWRegIndex (MCRegister Reg) const {
3277
+ return getEncodingValue (Reg) & AMDGPU::HWEncoding::REG_IDX_MASK;
3278
+ }
3279
+
3276
3280
unsigned AMDGPU::getRegBitWidth (const TargetRegisterClass &RC) {
3277
3281
return getRegBitWidth (RC.getID ());
3278
3282
}
@@ -3353,6 +3357,40 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const {
3353
3357
: getAnyVGPRClassForBitWidth (BitWidth);
3354
3358
}
3355
3359
3360
+ const TargetRegisterClass *
3361
+ SIRegisterInfo::getAlignedLo256VGPRClassForBitWidth (unsigned BitWidth) const {
3362
+ if (BitWidth <= 32 )
3363
+ return &AMDGPU::VGPR_32_Lo256RegClass;
3364
+ if (BitWidth <= 64 )
3365
+ return &AMDGPU::VReg_64_Lo256_Align2RegClass;
3366
+ if (BitWidth <= 96 )
3367
+ return &AMDGPU::VReg_96_Lo256_Align2RegClass;
3368
+ if (BitWidth <= 128 )
3369
+ return &AMDGPU::VReg_128_Lo256_Align2RegClass;
3370
+ if (BitWidth <= 160 )
3371
+ return &AMDGPU::VReg_160_Lo256_Align2RegClass;
3372
+ if (BitWidth <= 192 )
3373
+ return &AMDGPU::VReg_192_Lo256_Align2RegClass;
3374
+ if (BitWidth <= 224 )
3375
+ return &AMDGPU::VReg_224_Lo256_Align2RegClass;
3376
+ if (BitWidth <= 256 )
3377
+ return &AMDGPU::VReg_256_Lo256_Align2RegClass;
3378
+ if (BitWidth <= 288 )
3379
+ return &AMDGPU::VReg_288_Lo256_Align2RegClass;
3380
+ if (BitWidth <= 320 )
3381
+ return &AMDGPU::VReg_320_Lo256_Align2RegClass;
3382
+ if (BitWidth <= 352 )
3383
+ return &AMDGPU::VReg_352_Lo256_Align2RegClass;
3384
+ if (BitWidth <= 384 )
3385
+ return &AMDGPU::VReg_384_Lo256_Align2RegClass;
3386
+ if (BitWidth <= 512 )
3387
+ return &AMDGPU::VReg_512_Lo256_Align2RegClass;
3388
+ if (BitWidth <= 1024 )
3389
+ return &AMDGPU::VReg_1024_Lo256_Align2RegClass;
3390
+
3391
+ return nullptr ;
3392
+ }
3393
+
3356
3394
static const TargetRegisterClass *
3357
3395
getAnyAGPRClassForBitWidth (unsigned BitWidth) {
3358
3396
if (BitWidth == 64 )
@@ -3547,7 +3585,17 @@ bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
3547
3585
const TargetRegisterClass *
3548
3586
SIRegisterInfo::getEquivalentVGPRClass (const TargetRegisterClass *SRC) const {
3549
3587
unsigned Size = getRegSizeInBits (*SRC);
3550
- const TargetRegisterClass *VRC = getVGPRClassForBitWidth (Size);
3588
+
3589
+ switch (SRC->getID ()) {
3590
+ default :
3591
+ break ;
3592
+ case AMDGPU::VS_32_Lo256RegClassID:
3593
+ case AMDGPU::VS_64_Lo256RegClassID:
3594
+ return getAllocatableClass (getAlignedLo256VGPRClassForBitWidth (Size));
3595
+ }
3596
+
3597
+ const TargetRegisterClass *VRC =
3598
+ getAllocatableClass (getVGPRClassForBitWidth (Size));
3551
3599
assert (VRC && " Invalid register class size" );
3552
3600
return VRC;
3553
3601
}
@@ -4005,7 +4053,12 @@ SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
4005
4053
unsigned SIRegisterInfo::getNumUsedPhysRegs (const MachineRegisterInfo &MRI,
4006
4054
const TargetRegisterClass &RC,
4007
4055
bool IncludeCalls) const {
4008
- for (MCPhysReg Reg : reverse (RC.getRegisters ()))
4056
+ unsigned NumArchVGPRs = ST.has1024AddressableVGPRs () ? 1024 : 256 ;
4057
+ ArrayRef<MCPhysReg> Registers =
4058
+ (RC.getID () == AMDGPU::VGPR_32RegClassID)
4059
+ ? RC.getRegisters ().take_front (NumArchVGPRs)
4060
+ : RC.getRegisters ();
4061
+ for (MCPhysReg Reg : reverse (Registers))
4009
4062
if (MRI.isPhysRegUsed (Reg, /* SkipRegMaskTest=*/ !IncludeCalls))
4010
4063
return getHWRegIndex (Reg) + 1 ;
4011
4064
return 0 ;
0 commit comments