@@ -228,16 +228,12 @@ def SGPR_NULL64 :
228
228
// need them, we need to do a 64 bit load and extract the bits manually.
229
229
multiclass ApertureRegister<string name, bits<10> regIdx> {
230
230
let isConstant = true in {
231
- // FIXME: We shouldn't need to define subregisters for these (nor add them to any 16 bit
232
- // register classes), but if we don't it seems to confuse the TableGen
233
- // backend and we end up with a lot of weird register pressure sets and classes.
234
231
defm _LO : SIRegLoHi16 <name, regIdx>;
235
- defm _HI : SIRegLoHi16 <"", regIdx>;
236
-
237
- def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO), !cast<Register>(NAME#_HI)]> {
232
+ def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO)]> {
238
233
let Namespace = "AMDGPU";
239
- let SubRegIndices = [sub0, sub1 ];
234
+ let SubRegIndices = [sub0];
240
235
let HWEncoding = !cast<Register>(NAME#_LO).HWEncoding;
236
+ let CoveredBySubRegs = 0;
241
237
}
242
238
} // isConstant = true
243
239
}
@@ -790,8 +786,7 @@ let GeneratePressureSet = 0, HasSGPR = 1 in {
790
786
def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
791
787
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
792
788
SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO,
793
- SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI,
794
- SRC_SHARED_LIMIT_HI, SRC_PRIVATE_BASE_HI, SRC_PRIVATE_LIMIT_HI, SRC_POPS_EXITING_WAVE_ID,
789
+ SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_POPS_EXITING_WAVE_ID,
795
790
SRC_VCCZ, SRC_EXECZ, SRC_SCC, SRC_FLAT_SCRATCH_BASE_LO, SRC_FLAT_SCRATCH_BASE_HI)> {
796
791
let AllocationPriority = 0;
797
792
}
@@ -801,10 +796,9 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
801
796
XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16,
802
797
TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
803
798
SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16,
804
- SRC_SHARED_BASE_HI_LO16, SRC_SHARED_LIMIT_HI_LO16, SRC_PRIVATE_BASE_HI_LO16,
805
- SRC_PRIVATE_LIMIT_HI_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16,
806
- SRC_EXECZ_LO16, SRC_SCC_LO16, EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16,
807
- SRC_FLAT_SCRATCH_BASE_LO_LO16, SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
799
+ SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16,
800
+ EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16,
801
+ SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
808
802
let Size = 16;
809
803
let isAllocatable = 0;
810
804
let BaseClassOrder = 16;
@@ -825,6 +819,13 @@ def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2
825
819
let AllocationPriority = 0;
826
820
}
827
821
822
+ def APERTURE_Class : SIRegisterClass<"AMDGPU", Reg64Types.types, 32,
823
+ (add SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
824
+ let isAllocatable = 0;
825
+ let Size = 64;
826
+ let BaseClassOrder = 10000;
827
+ }
828
+
828
829
} // End GeneratePressureSet = 0
829
830
830
831
// Register class for all scalar registers (SGPRs + Special Registers)
@@ -876,8 +877,7 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16],
876
877
}
877
878
878
879
def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
879
- (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SRC_SHARED_BASE,
880
- SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA,
880
+ (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA,
881
881
SRC_FLAT_SCRATCH_BASE)> {
882
882
let CopyCost = 1;
883
883
let AllocationPriority = 1;
@@ -900,6 +900,14 @@ def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f1
900
900
let Size = 64;
901
901
}
902
902
903
+ def SReg_64_Encodable : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
904
+ (add SReg_64, APERTURE_Class)> {
905
+ let CopyCost = 1;
906
+ let isAllocatable = 0;
907
+ let HasSGPR = 1;
908
+ let Size = 64;
909
+ }
910
+
903
911
def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
904
912
(add SReg_64_XEXEC, SReg_32_XEXEC)> {
905
913
let CopyCost = 1;
@@ -1225,7 +1233,7 @@ def SSrc_bf16: SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_BF16">;
1225
1233
def SSrc_f16 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_FP16">;
1226
1234
def SSrc_b32 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_INT32">;
1227
1235
def SSrc_f32 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_FP32">;
1228
- def SSrc_b64 : SrcRegOrImm9 <SReg_64 , "OPERAND_REG_IMM_INT64">;
1236
+ def SSrc_b64 : SrcRegOrImm9 <SReg_64_Encodable , "OPERAND_REG_IMM_INT64">;
1229
1237
1230
1238
def SSrcOrLds_b32 : SrcRegOrImm9 <SRegOrLds_32, "OPERAND_REG_IMM_INT32">;
1231
1239
0 commit comments