@@ -228,16 +228,12 @@ def SGPR_NULL64 :
228228// need them, we need to do a 64 bit load and extract the bits manually.
229229multiclass ApertureRegister<string name, bits<10> regIdx> {
230230 let isConstant = true in {
231- // FIXME: We shouldn't need to define subregisters for these (nor add them to any 16 bit
232- // register classes), but if we don't it seems to confuse the TableGen
233- // backend and we end up with a lot of weird register pressure sets and classes.
234231 defm _LO : SIRegLoHi16 <name, regIdx>;
235- defm _HI : SIRegLoHi16 <"", regIdx>;
236-
237- def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO), !cast<Register>(NAME#_HI)]> {
232+ def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO)]> {
238233 let Namespace = "AMDGPU";
239- let SubRegIndices = [sub0, sub1 ];
234+ let SubRegIndices = [sub0];
240235 let HWEncoding = !cast<Register>(NAME#_LO).HWEncoding;
236+ let CoveredBySubRegs = 0;
241237 }
242238 } // isConstant = true
243239}
@@ -790,8 +786,7 @@ let GeneratePressureSet = 0, HasSGPR = 1 in {
790786def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
791787 (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
792788 SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO,
793- SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI,
794- SRC_SHARED_LIMIT_HI, SRC_PRIVATE_BASE_HI, SRC_PRIVATE_LIMIT_HI, SRC_POPS_EXITING_WAVE_ID,
789+ SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_POPS_EXITING_WAVE_ID,
795790 SRC_VCCZ, SRC_EXECZ, SRC_SCC, SRC_FLAT_SCRATCH_BASE_LO, SRC_FLAT_SCRATCH_BASE_HI)> {
796791 let AllocationPriority = 0;
797792}
@@ -801,10 +796,9 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
801796 XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16,
802797 TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
803798 SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16,
804- SRC_SHARED_BASE_HI_LO16, SRC_SHARED_LIMIT_HI_LO16, SRC_PRIVATE_BASE_HI_LO16,
805- SRC_PRIVATE_LIMIT_HI_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16,
806- SRC_EXECZ_LO16, SRC_SCC_LO16, EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16,
807- SRC_FLAT_SCRATCH_BASE_LO_LO16, SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
799+ SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16,
800+ EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16,
801+ SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
808802 let Size = 16;
809803 let isAllocatable = 0;
810804 let BaseClassOrder = 16;
@@ -825,6 +819,13 @@ def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2
825819 let AllocationPriority = 0;
826820}
827821
822+ def APERTURE_Class : SIRegisterClass<"AMDGPU", Reg64Types.types, 32,
823+ (add SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
824+ let isAllocatable = 0;
825+ let Size = 64;
826+ let BaseClassOrder = 10000;
827+ }
828+
828829} // End GeneratePressureSet = 0
829830
830831// Register class for all scalar registers (SGPRs + Special Registers)
@@ -876,8 +877,7 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16],
876877}
877878
878879def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
879- (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SRC_SHARED_BASE,
880- SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA,
880+ (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA,
881881 SRC_FLAT_SCRATCH_BASE)> {
882882 let CopyCost = 1;
883883 let AllocationPriority = 1;
@@ -900,6 +900,14 @@ def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f1
900900 let Size = 64;
901901}
902902
903+ def SReg_64_Encodable : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
904+ (add SReg_64, APERTURE_Class)> {
905+ let CopyCost = 1;
906+ let isAllocatable = 0;
907+ let HasSGPR = 1;
908+ let Size = 64;
909+ }
910+
903911def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
904912 (add SReg_64_XEXEC, SReg_32_XEXEC)> {
905913 let CopyCost = 1;
@@ -1225,7 +1233,7 @@ def SSrc_bf16: SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_BF16">;
12251233def SSrc_f16 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_FP16">;
12261234def SSrc_b32 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_INT32">;
12271235def SSrc_f32 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_FP32">;
1228- def SSrc_b64 : SrcRegOrImm9 <SReg_64 , "OPERAND_REG_IMM_INT64">;
1236+ def SSrc_b64 : SrcRegOrImm9 <SReg_64_Encodable , "OPERAND_REG_IMM_INT64">;
12291237
12301238def SSrcOrLds_b32 : SrcRegOrImm9 <SRegOrLds_32, "OPERAND_REG_IMM_INT32">;
12311239
0 commit comments