Skip to content

Commit d9790e0

Browse files
committed
[AMDGPU] Prioritize allocation of low 256 VGPR classes
If we have 1024 VGPRs available we need to give priority to the allocation of these registers where operands can only use low 256. That is noteably scale operands of V_WMMA_SCALE instructions. Otherwise large tuples will be allocated first and take all low registers, so we would have to spill to get a room for these scale registers. Allocation priority itself does not eliminate spilling completely in large kernels, although helps to some degree. Increasing spill weight of a restricted class on top of it helps.
1 parent d4c8cfe commit d9790e0

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,17 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
501501

502502
SmallVector<StringLiteral>
503503
getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override;
504+
505+
float
506+
getSpillWeightScaleFactor(const TargetRegisterClass *RC) const override {
507+
// Prioritize VGPR_32_Lo256 over other classes which may occupy registers
508+
// beyond v256.
509+
return AMDGPUGenRegisterInfo::getSpillWeightScaleFactor(RC) *
510+
((RC == &AMDGPU::VGPR_32_Lo256RegClass ||
511+
RC == &AMDGPU::VReg_64_Lo256_Align2RegClass)
512+
? 2.0
513+
: 1.0);
514+
}
504515
};
505516

506517
namespace AMDGPU {

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,7 @@ def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg1
644644
// Identical to VGPR_32 except it only contains the low 256 (Lo256) registers.
645645
def VGPR_32_Lo256 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
646646
(add (sequence "VGPR%u", 0, 255))> {
647-
let AllocationPriority = 0;
647+
let AllocationPriority = !add(3, !mul(BaseClassPriority, BaseClassScaleFactor));
648648
let GeneratePressureSet = 0;
649649
let Size = 32;
650650
let Weight = 1;

0 commit comments

Comments
 (0)