Skip to content

Commit 1bb104e

Browse files
committed
AMDGPU: Start using RegClassByHwMode for wavesize operands
This eliminates the pseudo registerclasses used to hack the wave register class, which are now replaced with RegClassByHwMode, so most of the diff is from register class ID renumbering.
1 parent c1ac2e0 commit 1bb104e

36 files changed

+672
-603
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2956,15 +2956,46 @@ def HasSetPrioIncWgInst : Predicate<"Subtarget->hasSetPrioIncWgInst()">,
29562956
def NeedsAlignedVGPRs : Predicate<"Subtarget->needsAlignedVGPRs()">,
29572957
AssemblerPredicate<(all_of FeatureRequiresAlignedVGPRs)>;
29582958

2959+
def NotNeedsAlignedVGPRs : Predicate<"!Subtarget->needsAlignedVGPRs()">,
2960+
AssemblerPredicate<(all_of (not FeatureRequiresAlignedVGPRs))>;
2961+
2962+
def isWave32 : Predicate<"Subtarget->isWave32()">,
2963+
AssemblerPredicate <(any_of FeatureWavefrontSize32,
2964+
FeatureAssemblerPermissiveWavesize)>;
2965+
def isWave64 : Predicate<"Subtarget->isWave64()">,
2966+
AssemblerPredicate <(any_of FeatureWavefrontSize64,
2967+
FeatureAssemblerPermissiveWavesize)>;
2968+
2969+
def isWave32Strict : Predicate<"Subtarget->isWave32()">,
2970+
AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
2971+
def isWave64Strict : Predicate<"Subtarget->isWave64()">,
2972+
AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
2973+
29592974
//===----------------------------------------------------------------------===//
29602975
// HwModes
29612976
//===----------------------------------------------------------------------===//
29622977

2963-
// gfx90a-gfx950. Has AGPRs, and also the align2 VGPR/AGPR requirement
2978+
defvar DefaultMode_Wave64 = DefaultMode;
2979+
defvar DefaultMode_Wave32 = HwMode<[isWave32, NotNeedsAlignedVGPRs]>;
2980+
2981+
// gfx90a-gfx950. Has AGPRs, and also the align2 VGPR/AGPR requirement. Implied
2982+
// wave64.
29642983
def AVAlign2LoadStoreMode : HwMode<[HasMAIInsts, NeedsAlignedVGPRs]>;
29652984

29662985
// gfx1250, has alignment requirement but no AGPRs.
2967-
def AlignedVGPRNoAGPRMode : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs]>;
2986+
def AlignedVGPRNoAGPRMode_Wave32 : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs, isWave32Strict]>;
2987+
def AlignedVGPRNoAGPRMode_Wave64 : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs, isWave64Strict]>;
2988+
2989+
// FIXME: This should be able to only define a separate hwmode that
2990+
// only depends on wavesize for just ValueTypes. These use different
2991+
// HwMode namespaces. If we don't define the full set of modes used
2992+
// for RegClassByHwMode, tablegen crashes for some reason
2993+
def WaveSizeVT : ValueTypeByHwMode<[
2994+
DefaultMode_Wave64,
2995+
AVAlign2LoadStoreMode,
2996+
AlignedVGPRNoAGPRMode_Wave64,
2997+
DefaultMode_Wave32,
2998+
AlignedVGPRNoAGPRMode_Wave32], [i64, i64, i64, i32, i32]>;
29682999

29693000

29703001
// Include AMDGPU TD files

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,6 @@ unsigned AMDGPURegisterBankInfo::getBreakDownCost(
287287
const RegisterBank &
288288
AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
289289
LLT Ty) const {
290-
if (&RC == &AMDGPU::SReg_1RegClass)
291-
return AMDGPU::VCCRegBank;
292-
293290
// We promote real scalar booleans to SReg_32. Any SGPR using s1 is really a
294291
// VCC-like use.
295292
if (TRI->isSGPRClass(&RC)) {

llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def VGPRRegBank : RegisterBank<"VGPR",
1515
>;
1616

1717
// It is helpful to distinguish conditions from ordinary SGPRs.
18-
def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
18+
def VCCRegBank : RegisterBank<"VCC", [SReg_32, SReg_64]>;
1919

2020
def AGPRRegBank : RegisterBank <"AGPR",
2121
[AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_224, AReg_256, AReg_288, AReg_320, AReg_352, AReg_384, AReg_512, AReg_1024]

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -789,14 +789,24 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
789789
// Intention: print disassembler message when invalid code is decoded,
790790
// for example sgpr register used in VReg or VISrc(VReg or imm) operand.
791791
const MCOperandInfo &OpInfo = Desc.operands()[OpNo];
792-
int16_t RCID = MII.getOpRegClassID(
793-
OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
794-
if (RCID != -1) {
792+
if (OpInfo.RegClass != -1) {
793+
int16_t RCID = MII.getOpRegClassID(
794+
OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
795795
const MCRegisterClass &RC = MRI.getRegClass(RCID);
796796
auto Reg = mc2PseudoReg(Op.getReg());
797797
if (!RC.contains(Reg) && !isInlineValue(Reg)) {
798-
O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
799-
<< "\' register class*/";
798+
bool IsWaveSizeOp = OpInfo.isLookupRegClassByHwMode() &&
799+
(OpInfo.RegClass == AMDGPU::SReg_1 ||
800+
OpInfo.RegClass == AMDGPU::SReg_1_XEXEC);
801+
// Suppress this comment for a mismatched wavesize. Some users expect to
802+
// be able to assemble and disassemble modules with mixed wavesizes, but
803+
// we do not know the subtarget in different functions in MC.
804+
//
805+
// TODO: Should probably print it anyway, maybe a more specific version.
806+
if (!IsWaveSizeOp) {
807+
O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
808+
<< "\' register class*/";
809+
}
800810
}
801811
}
802812
} else if (Op.isImm()) {

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,6 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
def isWave32 : Predicate<"Subtarget->isWave32()">,
10-
AssemblerPredicate <(any_of FeatureWavefrontSize32,
11-
FeatureAssemblerPermissiveWavesize)>;
12-
def isWave64 : Predicate<"Subtarget->isWave64()">,
13-
AssemblerPredicate <(any_of FeatureWavefrontSize64,
14-
FeatureAssemblerPermissiveWavesize)>;
15-
169
class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
1710
: MnemonicAlias<From, To, VariantName>, PredicateControl;
1811

llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ Vreg1LoweringHelper::Vreg1LoweringHelper(MachineFunction *MF,
7575
bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) {
7676
assert(Changed || ConstrainRegs.empty());
7777
for (Register Reg : ConstrainRegs)
78-
MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
78+
MRI->constrainRegClass(Reg, TII->getRegisterInfo().getWaveMaskRegClass());
7979
ConstrainRegs.clear();
8080

8181
return Changed;

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3893,13 +3893,10 @@ const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
38933893
: &AMDGPU::VReg_64RegClass;
38943894
}
38953895

3896+
// FIXME: This should be deleted
38963897
const TargetRegisterClass *
38973898
SIRegisterInfo::getRegClass(unsigned RCID) const {
38983899
switch ((int)RCID) {
3899-
case AMDGPU::SReg_1RegClassID:
3900-
return getBoolRC();
3901-
case AMDGPU::SReg_1_XEXECRegClassID:
3902-
return getWaveMaskRegClass();
39033900
case -1:
39043901
return nullptr;
39053902
default:

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 71 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -896,20 +896,6 @@ def SReg_64_Encodable : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v
896896
let Size = 64;
897897
}
898898

899-
def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
900-
(add SReg_64_XEXEC, SReg_32_XEXEC)> {
901-
let CopyCost = 1;
902-
let isAllocatable = 0;
903-
let HasSGPR = 1;
904-
}
905-
906-
def SReg_1 : SIRegisterClass<"AMDGPU", [i1], 32,
907-
(add SReg_1_XEXEC, EXEC, EXEC_LO, EXEC_HI)> {
908-
let CopyCost = 1;
909-
let isAllocatable = 0;
910-
let HasSGPR = 1;
911-
}
912-
913899
multiclass SRegClass<int numRegs,
914900
list<ValueType> regTypes,
915901
SIRegisterTuples regList,
@@ -1205,79 +1191,140 @@ defm AV_512 : AVRegClass<16, VReg_512.RegTypes, (add VGPR_512), (add AGPR_512)>;
12051191
defm AV_1024 : AVRegClass<32, VReg_1024.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
12061192
}
12071193

1194+
def SReg_1_XEXEC : SIRegisterClassLike<0, false, false, true>,
1195+
RegClassByHwMode<
1196+
[DefaultMode_Wave64,
1197+
AlignedVGPRNoAGPRMode_Wave64,
1198+
AVAlign2LoadStoreMode,
1199+
DefaultMode_Wave32,
1200+
AlignedVGPRNoAGPRMode_Wave32],
1201+
[SReg_64_XEXEC,
1202+
SReg_64_XEXEC,
1203+
SReg_64_XEXEC,
1204+
SReg_32_XM0_XEXEC, // FIXME: Why do the wave32 cases exclude m0?
1205+
SReg_32_XM0_XEXEC]
1206+
>;
1207+
1208+
def SReg_1 : SIRegisterClassLike<0, false, false, true>,
1209+
RegClassByHwMode<
1210+
[DefaultMode_Wave64,
1211+
AlignedVGPRNoAGPRMode_Wave64,
1212+
AVAlign2LoadStoreMode,
1213+
DefaultMode_Wave32,
1214+
AlignedVGPRNoAGPRMode_Wave32],
1215+
[SReg_64,
1216+
SReg_64,
1217+
SReg_64,
1218+
SReg_32,
1219+
SReg_32]
1220+
>;
1221+
12081222
//===----------------------------------------------------------------------===//
12091223
//
12101224
// AlignTarget classes. Artifical classes to swap between
12111225
// even-aligned and any-aligned classes depending on subtarget.
12121226
//
12131227
//===----------------------------------------------------------------------===//
12141228

1229+
// We have 3 orthogonal properties to consider. Unfortunately we need
1230+
// to define the cross product of these states, minus unused
1231+
// combinations.
1232+
12151233
def AV_LdSt_32_Target : RegClassByHwMode<
1216-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1217-
[VGPR_32, AV_32, VGPR_32]>, SIRegisterClassLike<32, true, true> {
1234+
[DefaultMode_Wave64,
1235+
DefaultMode_Wave32,
1236+
AVAlign2LoadStoreMode,
1237+
AlignedVGPRNoAGPRMode_Wave64,
1238+
AlignedVGPRNoAGPRMode_Wave32],
1239+
[VGPR_32,
1240+
VGPR_32,
1241+
AV_32,
1242+
VGPR_32,
1243+
VGPR_32]>,
1244+
SIRegisterClassLike<32, true, true> {
12181245
let DecoderMethod = "decodeAVLdSt";
12191246
}
12201247

12211248
foreach RegSize = [ 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 512, 1024 ] in {
12221249
def VReg_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true>,
12231250
RegClassByHwMode<
1224-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1251+
[DefaultMode_Wave64,
1252+
DefaultMode_Wave32,
1253+
AVAlign2LoadStoreMode,
1254+
AlignedVGPRNoAGPRMode_Wave64,
1255+
AlignedVGPRNoAGPRMode_Wave32],
12251256
[!cast<RegisterClass>("VReg_"#RegSize),
1257+
!cast<RegisterClass>("VReg_"#RegSize),
1258+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12261259
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12271260
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12281261
let DecoderMethod = "DecodeVReg_"#RegSize#"RegisterClass";
12291262
}
12301263

12311264
def AReg_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, false, true>,
12321265
RegClassByHwMode<
1233-
[DefaultMode, AVAlign2LoadStoreMode, /*Unused combination*/],
1266+
[DefaultMode_Wave64, /*unused combination*/ AVAlign2LoadStoreMode, /*Unused combination*/ /*Unused combination*/],
12341267
[!cast<RegisterClass>("AReg_"#RegSize),
1268+
/*unused combination*/
12351269
!cast<RegisterClass>("AReg_"#RegSize#_Align2)
1270+
/*Unused combination*/
12361271
/*Unused combination*/]> {
12371272
let DecoderMethod = "DecodeAReg_"#RegSize#"RegisterClass";
12381273
}
12391274

12401275
def AV_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true, true>,
12411276
RegClassByHwMode<
1242-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1277+
[DefaultMode_Wave32,
1278+
DefaultMode_Wave64,
1279+
AVAlign2LoadStoreMode,
1280+
AlignedVGPRNoAGPRMode_Wave64,
1281+
AlignedVGPRNoAGPRMode_Wave32],
12431282
[!cast<RegisterClass>("AV_"#RegSize),
1283+
!cast<RegisterClass>("AV_"#RegSize),
12441284
!cast<RegisterClass>("AV_"#RegSize#_Align2),
1285+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12451286
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12461287
let DecoderMethod = "DecodeAV_"#RegSize#"RegisterClass";
12471288
}
12481289

12491290
def AV_LdSt_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true, true>,
12501291
RegClassByHwMode<
1251-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1292+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
12521293
[!cast<RegisterClass>("VReg_"#RegSize),
1294+
!cast<RegisterClass>("VReg_"#RegSize),
12531295
!cast<RegisterClass>("AV_"#RegSize#_Align2),
1296+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12541297
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12551298
let DecoderMethod = "decodeAVLdSt";
12561299
}
12571300

12581301
def AV_LdSt_#RegSize#_Align2 : SIRegisterClassLike<RegSize, true, true>,
12591302
RegClassByHwMode<
1260-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1303+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
12611304
[!cast<RegisterClass>("VReg_"#RegSize#_Align2),
1305+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12621306
!cast<RegisterClass>("AV_"#RegSize#_Align2),
1307+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12631308
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12641309
let DecoderMethod = "decodeAVLdSt";
12651310
}
12661311

12671312
def AV_LdSt_#RegSize#_Align1 : SIRegisterClassLike<RegSize, true, true>,
12681313
RegClassByHwMode<
1269-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1314+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
12701315
[!cast<RegisterClass>("VReg_"#RegSize),
1316+
!cast<RegisterClass>("VReg_"#RegSize),
12711317
!cast<RegisterClass>("AV_"#RegSize),
1318+
!cast<RegisterClass>("VReg_"#RegSize),
12721319
!cast<RegisterClass>("VReg_"#RegSize)]> {
12731320
let DecoderMethod = "decodeAVLdSt";
12741321
}
12751322
}
12761323

12771324
def VS_64_AlignTarget : SIRegisterClassLike<64, true, false, true>,
12781325
RegClassByHwMode<
1279-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1280-
[VS_64, VS_64_Align2, VS_64_Align2]> {
1326+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
1327+
[VS_64, VS_64, VS_64_Align2, VS_64_Align2, VS_64_Align2]> {
12811328
let DecoderMethod = "decodeSrcRegOrImm9";
12821329
}
12831330

llvm/lib/Target/AMDGPU/VOPCInstructions.td

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,18 +1233,12 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
12331233
// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
12341234
// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
12351235
multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt, dag dstInst = (inst $src0, $src1)> {
1236-
let WaveSizePredicate = isWave64 in
12371236
def : GCNPat <
1238-
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1239-
(i64 (COPY_TO_REGCLASS dstInst, SReg_64))
1237+
(WaveSizeVT (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1238+
dstInst
12401239
>;
12411240

12421241
let WaveSizePredicate = isWave32 in {
1243-
def : GCNPat <
1244-
(i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1245-
(i32 (COPY_TO_REGCLASS dstInst, SReg_32))
1246-
>;
1247-
12481242
// Support codegen of i64 setcc in wave32 mode.
12491243
def : GCNPat <
12501244
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2197,12 +2197,12 @@ include "VOP3PInstructions.td"
21972197
include "VOPDInstructions.td"
21982198

21992199
class ClassPat<Instruction inst, ValueType vt> : GCNPat <
2200-
(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
2200+
(i1 (is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask))),
22012201
(inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask))
22022202
>;
22032203

22042204
class ClassPat_t16<Instruction inst, ValueType vt> : GCNPat <
2205-
(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
2205+
(i1 (is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask))),
22062206
(inst i32:$src0_mods, vt:$src0, SRCMODS.NONE, (V_MOV_B32_e32 timm:$mask))
22072207
>;
22082208

0 commit comments

Comments
 (0)