@@ -109,6 +109,23 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
109109 let TSFlags{2} = HasVGPR;
110110 let TSFlags{3} = HasAGPR;
111111 let TSFlags{4} = HasSGPR;
112+
113+ // RA will use RegisterClass AllocationPriority amongst other info (e.g. ordering in the basic block)
114+ // to decide which registers to try to assign first. Usually, this RegisterClass priority is given
115+ // very high priority, if not the highest priority, when considering which VirtReg to allocate next.
116+ //
117+ // We have 5 bits to assign AllocationPriorities to RegisterClasses. Generally, it is beneficial to
118+ // assign more constrained RegisterClasses first. As a result, we prioritize register classes with
119+ // more 32 bit tuples (e.g. VReg_512) over registers with fewer tuples (e.g. VGPR_32).
120+ //
121+ // The interesting case is the vector register case on architectures which have ARegs, VRegs, AVRegs.
122+ // In this case, we would like to assign ARegs and VRegs before AVRegs, as AVRegs are less constrained
123+ // and can be assigned to both AGPRs and VGPRs. We use the 5th bit to encode this into the
124+ // RegisterClass AllocationPriority. BaseClassPriority is used to turn the bit on, and BaseClassScaleFactor
125+ // is used for scaling of the bit (i.e. 1 << 4).
126+ field int BaseClassPriority = 1;
127+ field int BaseClassScaleFactor = 16;
128+
112129}
113130
114131multiclass SIRegLoHi16 <string n, bits<8> regIdx, bit ArtificialHigh = 1,
@@ -575,7 +592,7 @@ let HasVGPR = 1 in {
575592def VGPR_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
576593 (add (interleave (sequence "VGPR%u_LO16", 0, 255),
577594 (sequence "VGPR%u_HI16", 0, 255)))> {
578- let AllocationPriority = 2 ;
595+ let AllocationPriority = !add(2, !mul(BaseClassPriority, BaseClassScaleFactor)) ;
579596 let Size = 16;
580597 let GeneratePressureSet = 0;
581598
@@ -601,7 +618,7 @@ def VGPR_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
601618// i16/f16 only on VI+
602619def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
603620 (add (sequence "VGPR%u", 0, 255))> {
604- let AllocationPriority = 0 ;
621+ let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor)) ;
605622 let Size = 32;
606623 let Weight = 1;
607624 let BaseClassOrder = 32;
@@ -610,7 +627,7 @@ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types
610627// Identical to VGPR_32 except it only contains the low 128 (Lo128) registers.
611628def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
612629 (add (sequence "VGPR%u", 0, 127))> {
613- let AllocationPriority = 0 ;
630+ let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor)) ;
614631 let GeneratePressureSet = 0;
615632 let Size = 32;
616633 let Weight = 1;
@@ -668,7 +685,7 @@ def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
668685// AccVGPR 32-bit registers
669686def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
670687 (add (sequence "AGPR%u", 0, 255))> {
671- let AllocationPriority = 0 ;
688+ let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor)) ;
672689 let Size = 32;
673690 let Weight = 1;
674691 let BaseClassOrder = 32;
@@ -940,14 +957,23 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
940957
941958 // Requires n v_mov_b32 to copy
942959 let CopyCost = numRegs;
943- let AllocationPriority = !sub(numRegs, 1);
960+
961+ // Since we only have 5 bits for the RegisterClass Allocation Priorty, and since we use the
962+ // 5th bit for BaseClassPriority, we need to encode the SizePriority into 4 bits. As a result
963+ // of this encoding, for registers with numRegs 15 or 16, we give SizePriority of 14, and for
964+ // regsters with numRegs 17+ we give SizePriority of 15. In practice, there is only one
965+ // RegClass per Vector Register type in each of these groups (i.e. numRegs = 15,16 : {VReg_512},
966+ // and numRegs = 17+ : {VReg_1024}). Therefore, we have not lost any info by compressing.
967+ defvar SizePrioriity = !if(!le(numRegs, 14), !sub(numRegs, 1), !if(!le(numRegs, 16), 14, 15));
968+
969+ let AllocationPriority = !add(SizePrioriity, !mul(BaseClassPriority, BaseClassScaleFactor));
944970 let Weight = numRegs;
945971}
946972
947973// Define a register tuple class, along with one requiring an even
948974// aligned base register.
949975multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
950- let HasVGPR = 1 in {
976+ let HasVGPR = 1, BaseClassPriority = 1 in {
951977 // Define the regular class.
952978 def "" : VRegClassBase<numRegs, regTypes, regList> {
953979 let BaseClassOrder = !mul(numRegs, 32);
@@ -981,7 +1007,7 @@ defm VReg_1024 : VRegClass<32, Reg1024Types.types, (add VGPR_1024)>;
9811007}
9821008
9831009multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
984- let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {
1010+ let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1, BaseClassPriority = 1 in {
9851011 // Define the regular class.
9861012 def "" : VRegClassBase<numRegs, regTypes, regList> {
9871013 let BaseClassOrder = !mul(numRegs, 32);
@@ -1066,6 +1092,7 @@ def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_6
10661092def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
10671093 let HasVGPR = 1;
10681094 let HasAGPR = 1;
1095+ let BaseClassPriority = 0;
10691096 let Size = 32;
10701097}
10711098} // End GeneratePressureSet = 0
@@ -1074,7 +1101,7 @@ def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_3
10741101// aligned base register.
10751102multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
10761103 dag vregList, dag aregList> {
1077- let HasVGPR = 1, HasAGPR = 1 in {
1104+ let HasVGPR = 1, HasAGPR = 1, BaseClassPriority = 0 in {
10781105 // Define the regular class.
10791106 def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
10801107
0 commit comments