Skip to content

Commit 2bf9278

Browse files
authored
AMDGPU: Start using RegClassByHwMode for wavesize operands
(#159884) This eliminates the pseudo registerclasses used to hack the wave register class, which are now replaced with RegClassByHwMode, so most of the diff is from register class ID renumbering.
1 parent 196ea57 commit 2bf9278

36 files changed

+672
-603
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2976,15 +2976,46 @@ def HasSetPrioIncWgInst : Predicate<"Subtarget->hasSetPrioIncWgInst()">,
29762976
def NeedsAlignedVGPRs : Predicate<"Subtarget->needsAlignedVGPRs()">,
29772977
AssemblerPredicate<(all_of FeatureRequiresAlignedVGPRs)>;
29782978

2979+
def NotNeedsAlignedVGPRs : Predicate<"!Subtarget->needsAlignedVGPRs()">,
2980+
AssemblerPredicate<(all_of (not FeatureRequiresAlignedVGPRs))>;
2981+
2982+
def isWave32 : Predicate<"Subtarget->isWave32()">,
2983+
AssemblerPredicate <(any_of FeatureWavefrontSize32,
2984+
FeatureAssemblerPermissiveWavesize)>;
2985+
def isWave64 : Predicate<"Subtarget->isWave64()">,
2986+
AssemblerPredicate <(any_of FeatureWavefrontSize64,
2987+
FeatureAssemblerPermissiveWavesize)>;
2988+
2989+
def isWave32Strict : Predicate<"Subtarget->isWave32()">,
2990+
AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
2991+
def isWave64Strict : Predicate<"Subtarget->isWave64()">,
2992+
AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
2993+
29792994
//===----------------------------------------------------------------------===//
29802995
// HwModes
29812996
//===----------------------------------------------------------------------===//
29822997

2983-
// gfx90a-gfx950. Has AGPRs, and also the align2 VGPR/AGPR requirement
2998+
defvar DefaultMode_Wave64 = DefaultMode;
2999+
defvar DefaultMode_Wave32 = HwMode<[isWave32, NotNeedsAlignedVGPRs]>;
3000+
3001+
// gfx90a-gfx950. Has AGPRs, and also the align2 VGPR/AGPR requirement. Implied
3002+
// wave64.
29843003
def AVAlign2LoadStoreMode : HwMode<[HasMAIInsts, NeedsAlignedVGPRs]>;
29853004

29863005
// gfx1250, has alignment requirement but no AGPRs.
2987-
def AlignedVGPRNoAGPRMode : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs]>;
3006+
def AlignedVGPRNoAGPRMode_Wave32 : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs, isWave32Strict]>;
3007+
def AlignedVGPRNoAGPRMode_Wave64 : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs, isWave64Strict]>;
3008+
3009+
// FIXME: This should be able to only define a separate hwmode that
3010+
// only depends on wavesize for just ValueTypes. These use different
3011+
// HwMode namespaces. If we don't define the full set of modes used
3012+
// for RegClassByHwMode, tablegen crashes for some reason
3013+
def WaveSizeVT : ValueTypeByHwMode<[
3014+
DefaultMode_Wave64,
3015+
AVAlign2LoadStoreMode,
3016+
AlignedVGPRNoAGPRMode_Wave64,
3017+
DefaultMode_Wave32,
3018+
AlignedVGPRNoAGPRMode_Wave32], [i64, i64, i64, i32, i32]>;
29883019

29893020

29903021
// Include AMDGPU TD files

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,6 @@ unsigned AMDGPURegisterBankInfo::getBreakDownCost(
287287
const RegisterBank &
288288
AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
289289
LLT Ty) const {
290-
if (&RC == &AMDGPU::SReg_1RegClass)
291-
return AMDGPU::VCCRegBank;
292-
293290
// We promote real scalar booleans to SReg_32. Any SGPR using s1 is really a
294291
// VCC-like use.
295292
if (TRI->isSGPRClass(&RC)) {

llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def VGPRRegBank : RegisterBank<"VGPR",
1515
>;
1616

1717
// It is helpful to distinguish conditions from ordinary SGPRs.
18-
def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
18+
def VCCRegBank : RegisterBank<"VCC", [SReg_32, SReg_64]>;
1919

2020
def AGPRRegBank : RegisterBank <"AGPR",
2121
[AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_224, AReg_256, AReg_288, AReg_320, AReg_352, AReg_384, AReg_512, AReg_1024]

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -795,14 +795,24 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
795795
// Intention: print disassembler message when invalid code is decoded,
796796
// for example sgpr register used in VReg or VISrc(VReg or imm) operand.
797797
const MCOperandInfo &OpInfo = Desc.operands()[OpNo];
798-
int16_t RCID = MII.getOpRegClassID(
799-
OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
800-
if (RCID != -1) {
798+
if (OpInfo.RegClass != -1) {
799+
int16_t RCID = MII.getOpRegClassID(
800+
OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
801801
const MCRegisterClass &RC = MRI.getRegClass(RCID);
802802
auto Reg = mc2PseudoReg(Op.getReg());
803803
if (!RC.contains(Reg) && !isInlineValue(Reg)) {
804-
O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
805-
<< "\' register class*/";
804+
bool IsWaveSizeOp = OpInfo.isLookupRegClassByHwMode() &&
805+
(OpInfo.RegClass == AMDGPU::SReg_1 ||
806+
OpInfo.RegClass == AMDGPU::SReg_1_XEXEC);
807+
// Suppress this comment for a mismatched wavesize. Some users expect to
808+
// be able to assemble and disassemble modules with mixed wavesizes, but
809+
// we do not know the subtarget in different functions in MC.
810+
//
811+
// TODO: Should probably print it anyway, maybe a more specific version.
812+
if (!IsWaveSizeOp) {
813+
O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
814+
<< "\' register class*/";
815+
}
806816
}
807817
}
808818
} else if (Op.isImm()) {

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,6 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
def isWave32 : Predicate<"Subtarget->isWave32()">,
10-
AssemblerPredicate <(any_of FeatureWavefrontSize32,
11-
FeatureAssemblerPermissiveWavesize)>;
12-
def isWave64 : Predicate<"Subtarget->isWave64()">,
13-
AssemblerPredicate <(any_of FeatureWavefrontSize64,
14-
FeatureAssemblerPermissiveWavesize)>;
15-
169
class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
1710
: MnemonicAlias<From, To, VariantName>, PredicateControl;
1811

llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ Vreg1LoweringHelper::Vreg1LoweringHelper(MachineFunction *MF,
7575
bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) {
7676
assert(Changed || ConstrainRegs.empty());
7777
for (Register Reg : ConstrainRegs)
78-
MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
78+
MRI->constrainRegClass(Reg, TII->getRegisterInfo().getWaveMaskRegClass());
7979
ConstrainRegs.clear();
8080

8181
return Changed;

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3908,13 +3908,10 @@ const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
39083908
: &AMDGPU::VReg_64RegClass;
39093909
}
39103910

3911+
// FIXME: This should be deleted
39113912
const TargetRegisterClass *
39123913
SIRegisterInfo::getRegClass(unsigned RCID) const {
39133914
switch ((int)RCID) {
3914-
case AMDGPU::SReg_1RegClassID:
3915-
return getBoolRC();
3916-
case AMDGPU::SReg_1_XEXECRegClassID:
3917-
return getWaveMaskRegClass();
39183915
case -1:
39193916
return nullptr;
39203917
default:

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 71 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -896,20 +896,6 @@ def SReg_64_Encodable : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v
896896
let Size = 64;
897897
}
898898

899-
def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
900-
(add SReg_64_XEXEC, SReg_32_XEXEC)> {
901-
let CopyCost = 1;
902-
let isAllocatable = 0;
903-
let HasSGPR = 1;
904-
}
905-
906-
def SReg_1 : SIRegisterClass<"AMDGPU", [i1], 32,
907-
(add SReg_1_XEXEC, EXEC, EXEC_LO, EXEC_HI)> {
908-
let CopyCost = 1;
909-
let isAllocatable = 0;
910-
let HasSGPR = 1;
911-
}
912-
913899
multiclass SRegClass<int numRegs,
914900
list<ValueType> regTypes,
915901
SIRegisterTuples regList,
@@ -1205,79 +1191,140 @@ defm AV_512 : AVRegClass<16, VReg_512.RegTypes, (add VGPR_512), (add AGPR_512)>;
12051191
defm AV_1024 : AVRegClass<32, VReg_1024.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
12061192
}
12071193

1194+
def SReg_1_XEXEC : SIRegisterClassLike<0, false, false, true>,
1195+
RegClassByHwMode<
1196+
[DefaultMode_Wave64,
1197+
AlignedVGPRNoAGPRMode_Wave64,
1198+
AVAlign2LoadStoreMode,
1199+
DefaultMode_Wave32,
1200+
AlignedVGPRNoAGPRMode_Wave32],
1201+
[SReg_64_XEXEC,
1202+
SReg_64_XEXEC,
1203+
SReg_64_XEXEC,
1204+
SReg_32_XM0_XEXEC, // FIXME: Why do the wave32 cases exclude m0?
1205+
SReg_32_XM0_XEXEC]
1206+
>;
1207+
1208+
def SReg_1 : SIRegisterClassLike<0, false, false, true>,
1209+
RegClassByHwMode<
1210+
[DefaultMode_Wave64,
1211+
AlignedVGPRNoAGPRMode_Wave64,
1212+
AVAlign2LoadStoreMode,
1213+
DefaultMode_Wave32,
1214+
AlignedVGPRNoAGPRMode_Wave32],
1215+
[SReg_64,
1216+
SReg_64,
1217+
SReg_64,
1218+
SReg_32,
1219+
SReg_32]
1220+
>;
1221+
12081222
//===----------------------------------------------------------------------===//
12091223
//
12101224
// AlignTarget classes. Artifical classes to swap between
12111225
// even-aligned and any-aligned classes depending on subtarget.
12121226
//
12131227
//===----------------------------------------------------------------------===//
12141228

1229+
// We have 3 orthogonal properties to consider. Unfortunately we need
1230+
// to define the cross product of these states, minus unused
1231+
// combinations.
1232+
12151233
def AV_LdSt_32_Target : RegClassByHwMode<
1216-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1217-
[VGPR_32, AV_32, VGPR_32]>, SIRegisterClassLike<32, true, true> {
1234+
[DefaultMode_Wave64,
1235+
DefaultMode_Wave32,
1236+
AVAlign2LoadStoreMode,
1237+
AlignedVGPRNoAGPRMode_Wave64,
1238+
AlignedVGPRNoAGPRMode_Wave32],
1239+
[VGPR_32,
1240+
VGPR_32,
1241+
AV_32,
1242+
VGPR_32,
1243+
VGPR_32]>,
1244+
SIRegisterClassLike<32, true, true> {
12181245
let DecoderMethod = "decodeAVLdSt";
12191246
}
12201247

12211248
foreach RegSize = [ 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 512, 1024 ] in {
12221249
def VReg_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true>,
12231250
RegClassByHwMode<
1224-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1251+
[DefaultMode_Wave64,
1252+
DefaultMode_Wave32,
1253+
AVAlign2LoadStoreMode,
1254+
AlignedVGPRNoAGPRMode_Wave64,
1255+
AlignedVGPRNoAGPRMode_Wave32],
12251256
[!cast<RegisterClass>("VReg_"#RegSize),
1257+
!cast<RegisterClass>("VReg_"#RegSize),
1258+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12261259
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12271260
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12281261
let DecoderMethod = "DecodeVReg_"#RegSize#"RegisterClass";
12291262
}
12301263

12311264
def AReg_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, false, true>,
12321265
RegClassByHwMode<
1233-
[DefaultMode, AVAlign2LoadStoreMode, /*Unused combination*/],
1266+
[DefaultMode_Wave64, /*unused combination*/ AVAlign2LoadStoreMode, /*Unused combination*/ /*Unused combination*/],
12341267
[!cast<RegisterClass>("AReg_"#RegSize),
1268+
/*unused combination*/
12351269
!cast<RegisterClass>("AReg_"#RegSize#_Align2)
1270+
/*Unused combination*/
12361271
/*Unused combination*/]> {
12371272
let DecoderMethod = "DecodeAReg_"#RegSize#"RegisterClass";
12381273
}
12391274

12401275
def AV_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true, true>,
12411276
RegClassByHwMode<
1242-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1277+
[DefaultMode_Wave32,
1278+
DefaultMode_Wave64,
1279+
AVAlign2LoadStoreMode,
1280+
AlignedVGPRNoAGPRMode_Wave64,
1281+
AlignedVGPRNoAGPRMode_Wave32],
12431282
[!cast<RegisterClass>("AV_"#RegSize),
1283+
!cast<RegisterClass>("AV_"#RegSize),
12441284
!cast<RegisterClass>("AV_"#RegSize#_Align2),
1285+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12451286
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12461287
let DecoderMethod = "DecodeAV_"#RegSize#"RegisterClass";
12471288
}
12481289

12491290
def AV_LdSt_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true, true>,
12501291
RegClassByHwMode<
1251-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1292+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
12521293
[!cast<RegisterClass>("VReg_"#RegSize),
1294+
!cast<RegisterClass>("VReg_"#RegSize),
12531295
!cast<RegisterClass>("AV_"#RegSize#_Align2),
1296+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12541297
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12551298
let DecoderMethod = "decodeAVLdSt";
12561299
}
12571300

12581301
def AV_LdSt_#RegSize#_Align2 : SIRegisterClassLike<RegSize, true, true>,
12591302
RegClassByHwMode<
1260-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1303+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
12611304
[!cast<RegisterClass>("VReg_"#RegSize#_Align2),
1305+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12621306
!cast<RegisterClass>("AV_"#RegSize#_Align2),
1307+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12631308
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12641309
let DecoderMethod = "decodeAVLdSt";
12651310
}
12661311

12671312
def AV_LdSt_#RegSize#_Align1 : SIRegisterClassLike<RegSize, true, true>,
12681313
RegClassByHwMode<
1269-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1314+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
12701315
[!cast<RegisterClass>("VReg_"#RegSize),
1316+
!cast<RegisterClass>("VReg_"#RegSize),
12711317
!cast<RegisterClass>("AV_"#RegSize),
1318+
!cast<RegisterClass>("VReg_"#RegSize),
12721319
!cast<RegisterClass>("VReg_"#RegSize)]> {
12731320
let DecoderMethod = "decodeAVLdSt";
12741321
}
12751322
}
12761323

12771324
def VS_64_AlignTarget : SIRegisterClassLike<64, true, false, true>,
12781325
RegClassByHwMode<
1279-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1280-
[VS_64, VS_64_Align2, VS_64_Align2]> {
1326+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
1327+
[VS_64, VS_64, VS_64_Align2, VS_64_Align2, VS_64_Align2]> {
12811328
let DecoderMethod = "decodeSrcRegOrImm9";
12821329
}
12831330

llvm/lib/Target/AMDGPU/VOPCInstructions.td

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,18 +1233,12 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
12331233
// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
12341234
// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
12351235
multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt, dag dstInst = (inst $src0, $src1)> {
1236-
let WaveSizePredicate = isWave64 in
12371236
def : GCNPat <
1238-
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1239-
(i64 (COPY_TO_REGCLASS dstInst, SReg_64))
1237+
(WaveSizeVT (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1238+
dstInst
12401239
>;
12411240

12421241
let WaveSizePredicate = isWave32 in {
1243-
def : GCNPat <
1244-
(i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1245-
(i32 (COPY_TO_REGCLASS dstInst, SReg_32))
1246-
>;
1247-
12481242
// Support codegen of i64 setcc in wave32 mode.
12491243
def : GCNPat <
12501244
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2208,12 +2208,12 @@ include "VOP3PInstructions.td"
22082208
include "VOPDInstructions.td"
22092209

22102210
class ClassPat<Instruction inst, ValueType vt> : GCNPat <
2211-
(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
2211+
(i1 (is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask))),
22122212
(inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask))
22132213
>;
22142214

22152215
class ClassPat_t16<Instruction inst, ValueType vt> : GCNPat <
2216-
(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
2216+
(i1 (is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask))),
22172217
(inst i32:$src0_mods, vt:$src0, SRCMODS.NONE, (V_MOV_B32_e32 timm:$mask))
22182218
>;
22192219

0 commit comments

Comments
 (0)