Skip to content

Commit 72aa946

Browse files
authored
[AMDGPU] Drop high 32 bits of aperture registers (#158725)
Fixes: SWDEV-551181
1 parent bc931a5 commit 72aa946

11 files changed

+157
-129
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -913,7 +913,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
913913
return;
914914
}
915915

916-
if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
916+
if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
917917
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
918918
return;
919919
}

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -228,16 +228,12 @@ def SGPR_NULL64 :
228228
// need them, we need to do a 64 bit load and extract the bits manually.
229229
multiclass ApertureRegister<string name, bits<10> regIdx> {
230230
let isConstant = true in {
231-
// FIXME: We shouldn't need to define subregisters for these (nor add them to any 16 bit
232-
// register classes), but if we don't it seems to confuse the TableGen
233-
// backend and we end up with a lot of weird register pressure sets and classes.
234231
defm _LO : SIRegLoHi16 <name, regIdx>;
235-
defm _HI : SIRegLoHi16 <"", regIdx>;
236-
237-
def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO), !cast<Register>(NAME#_HI)]> {
232+
def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO)]> {
238233
let Namespace = "AMDGPU";
239-
let SubRegIndices = [sub0, sub1];
234+
let SubRegIndices = [sub0];
240235
let HWEncoding = !cast<Register>(NAME#_LO).HWEncoding;
236+
let CoveredBySubRegs = 0;
241237
}
242238
} // isConstant = true
243239
}
@@ -790,8 +786,7 @@ let GeneratePressureSet = 0, HasSGPR = 1 in {
790786
def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
791787
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
792788
SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO,
793-
SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI,
794-
SRC_SHARED_LIMIT_HI, SRC_PRIVATE_BASE_HI, SRC_PRIVATE_LIMIT_HI, SRC_POPS_EXITING_WAVE_ID,
789+
SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_POPS_EXITING_WAVE_ID,
795790
SRC_VCCZ, SRC_EXECZ, SRC_SCC, SRC_FLAT_SCRATCH_BASE_LO, SRC_FLAT_SCRATCH_BASE_HI)> {
796791
let AllocationPriority = 0;
797792
}
@@ -801,10 +796,9 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
801796
XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16,
802797
TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
803798
SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16,
804-
SRC_SHARED_BASE_HI_LO16, SRC_SHARED_LIMIT_HI_LO16, SRC_PRIVATE_BASE_HI_LO16,
805-
SRC_PRIVATE_LIMIT_HI_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16,
806-
SRC_EXECZ_LO16, SRC_SCC_LO16, EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16,
807-
SRC_FLAT_SCRATCH_BASE_LO_LO16, SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
799+
SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16,
800+
EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16,
801+
SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
808802
let Size = 16;
809803
let isAllocatable = 0;
810804
let BaseClassOrder = 16;
@@ -825,6 +819,13 @@ def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2
825819
let AllocationPriority = 0;
826820
}
827821

822+
def APERTURE_Class : SIRegisterClass<"AMDGPU", Reg64Types.types, 32,
823+
(add SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
824+
let isAllocatable = 0;
825+
let Size = 64;
826+
let BaseClassOrder = 10000;
827+
}
828+
828829
} // End GeneratePressureSet = 0
829830

830831
// Register class for all scalar registers (SGPRs + Special Registers)
@@ -876,8 +877,7 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16],
876877
}
877878

878879
def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
879-
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SRC_SHARED_BASE,
880-
SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA,
880+
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA,
881881
SRC_FLAT_SCRATCH_BASE)> {
882882
let CopyCost = 1;
883883
let AllocationPriority = 1;
@@ -900,6 +900,14 @@ def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f1
900900
let Size = 64;
901901
}
902902

903+
def SReg_64_Encodable : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
904+
(add SReg_64, APERTURE_Class)> {
905+
let CopyCost = 1;
906+
let isAllocatable = 0;
907+
let HasSGPR = 1;
908+
let Size = 64;
909+
}
910+
903911
def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
904912
(add SReg_64_XEXEC, SReg_32_XEXEC)> {
905913
let CopyCost = 1;
@@ -1225,7 +1233,7 @@ def SSrc_bf16: SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_BF16">;
12251233
def SSrc_f16 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_FP16">;
12261234
def SSrc_b32 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_INT32">;
12271235
def SSrc_f32 : SrcRegOrImm9 <SReg_32, "OPERAND_REG_IMM_FP32">;
1228-
def SSrc_b64 : SrcRegOrImm9 <SReg_64, "OPERAND_REG_IMM_INT64">;
1236+
def SSrc_b64 : SrcRegOrImm9 <SReg_64_Encodable, "OPERAND_REG_IMM_INT64">;
12291237

12301238
def SSrcOrLds_b32 : SrcRegOrImm9 <SRegOrLds_32, "OPERAND_REG_IMM_INT32">;
12311239

0 commit comments

Comments
 (0)