Skip to content

Commit 99aa6e5

Browse files
committed
AMDGPU: Start using RegClassByHwMode for wavesize operands
This eliminates the pseudo registerclasses used to hack the wave register class, which are now replaced with RegClassByHwMode, so most of the diff is from register class ID renumbering.
1 parent 95be6c5 commit 99aa6e5

35 files changed

+492
-423
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2945,15 +2945,46 @@ def HasSetPrioIncWgInst : Predicate<"Subtarget->hasSetPrioIncWgInst()">,
29452945
def NeedsAlignedVGPRs : Predicate<"Subtarget->needsAlignedVGPRs()">,
29462946
AssemblerPredicate<(all_of FeatureRequiresAlignedVGPRs)>;
29472947

2948+
def NotNeedsAlignedVGPRs : Predicate<"!Subtarget->needsAlignedVGPRs()">,
2949+
AssemblerPredicate<(all_of (not FeatureRequiresAlignedVGPRs))>;
2950+
2951+
def isWave32 : Predicate<"Subtarget->isWave32()">,
2952+
AssemblerPredicate <(any_of FeatureWavefrontSize32,
2953+
FeatureAssemblerPermissiveWavesize)>;
2954+
def isWave64 : Predicate<"Subtarget->isWave64()">,
2955+
AssemblerPredicate <(any_of FeatureWavefrontSize64,
2956+
FeatureAssemblerPermissiveWavesize)>;
2957+
2958+
def isWave32Strict : Predicate<"Subtarget->isWave32()">,
2959+
AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
2960+
def isWave64Strict : Predicate<"Subtarget->isWave64()">,
2961+
AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
2962+
29482963
//===----------------------------------------------------------------------===//
29492964
// HwModes
29502965
//===----------------------------------------------------------------------===//
29512966

2952-
// gfx90a-gfx950. Has AGPRs, and also the align2 VGPR/AGPR requirement
2967+
defvar DefaultMode_Wave64 = DefaultMode;
2968+
defvar DefaultMode_Wave32 = HwMode<[isWave32, NotNeedsAlignedVGPRs]>;
2969+
2970+
// gfx90a-gfx950. Has AGPRs, and also the align2 VGPR/AGPR requirement. Implied
2971+
// wave64.
29532972
def AVAlign2LoadStoreMode : HwMode<[HasMAIInsts, NeedsAlignedVGPRs]>;
29542973

29552974
// gfx1250, has alignment requirement but no AGPRs.
2956-
def AlignedVGPRNoAGPRMode : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs]>;
2975+
def AlignedVGPRNoAGPRMode_Wave32 : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs, isWave32Strict]>;
2976+
def AlignedVGPRNoAGPRMode_Wave64 : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs, isWave64Strict]>;
2977+
2978+
// FIXME: This should be able to only define a separate hwmode that
2979+
// only depends on wavesize for just ValueTypes. These use different
2980+
// HwMode namespaces. If we don't define the full set of modes used
2981+
// for RegClassByHwMode, tablegen crashes for some reason
2982+
def WaveSizeVT : ValueTypeByHwMode<[
2983+
DefaultMode_Wave64,
2984+
AVAlign2LoadStoreMode,
2985+
AlignedVGPRNoAGPRMode_Wave64,
2986+
DefaultMode_Wave32,
2987+
AlignedVGPRNoAGPRMode_Wave32], [i64, i64, i64, i32, i32]>;
29572988

29582989

29592990
// Include AMDGPU TD files

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,6 @@ unsigned AMDGPURegisterBankInfo::getBreakDownCost(
287287
const RegisterBank &
288288
AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
289289
LLT Ty) const {
290-
if (&RC == &AMDGPU::SReg_1RegClass)
291-
return AMDGPU::VCCRegBank;
292-
293290
// We promote real scalar booleans to SReg_32. Any SGPR using s1 is really a
294291
// VCC-like use.
295292
if (TRI->isSGPRClass(&RC)) {

llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def VGPRRegBank : RegisterBank<"VGPR",
1515
>;
1616

1717
// It is helpful to distinguish conditions from ordinary SGPRs.
18-
def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
18+
def VCCRegBank : RegisterBank<"VCC", [SReg_32, SReg_64]>;
1919

2020
def AGPRRegBank : RegisterBank <"AGPR",
2121
[AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_224, AReg_256, AReg_288, AReg_320, AReg_352, AReg_384, AReg_512, AReg_1024]

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -789,14 +789,24 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
789789
// Intention: print disassembler message when invalid code is decoded,
790790
// for example sgpr register used in VReg or VISrc(VReg or imm) operand.
791791
const MCOperandInfo &OpInfo = Desc.operands()[OpNo];
792-
int16_t RCID = MII.getOpRegClassID(
793-
OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
794-
if (RCID != -1) {
792+
if (OpInfo.RegClass != -1) {
793+
int16_t RCID = MII.getOpRegClassID(
794+
OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
795795
const MCRegisterClass &RC = MRI.getRegClass(RCID);
796796
auto Reg = mc2PseudoReg(Op.getReg());
797797
if (!RC.contains(Reg) && !isInlineValue(Reg)) {
798-
O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
799-
<< "\' register class*/";
798+
bool IsWaveSizeOp = OpInfo.isLookupRegClassByHwMode() &&
799+
(OpInfo.RegClass == AMDGPU::SReg_1 ||
800+
OpInfo.RegClass == AMDGPU::SReg_1_XEXEC);
801+
// Suppress this comment for a mismatched wavesize. Some users expect to
802+
// be able to assemble and disassemble modules with mixed wavesizes, but
803+
// we do not know the subtarget in different functions in MC.
804+
//
805+
// TODO: Should probably print it anyway, maybe a more specific version.
806+
if (!IsWaveSizeOp) {
807+
O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
808+
<< "\' register class*/";
809+
}
800810
}
801811
}
802812
} else if (Op.isImm()) {

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,6 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
def isWave32 : Predicate<"Subtarget->isWave32()">,
10-
AssemblerPredicate <(any_of FeatureWavefrontSize32,
11-
FeatureAssemblerPermissiveWavesize)>;
12-
def isWave64 : Predicate<"Subtarget->isWave64()">,
13-
AssemblerPredicate <(any_of FeatureWavefrontSize64,
14-
FeatureAssemblerPermissiveWavesize)>;
15-
169
class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
1710
: MnemonicAlias<From, To, VariantName>, PredicateControl;
1811

llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ Vreg1LoweringHelper::Vreg1LoweringHelper(MachineFunction *MF,
7575
bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) {
7676
assert(Changed || ConstrainRegs.empty());
7777
for (Register Reg : ConstrainRegs)
78-
MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
78+
MRI->constrainRegClass(Reg, TII->getRegisterInfo().getWaveMaskRegClass());
7979
ConstrainRegs.clear();
8080

8181
return Changed;

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3897,13 +3897,10 @@ const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
38973897
: &AMDGPU::VReg_64RegClass;
38983898
}
38993899

3900+
// FIXME: This should be deleted
39003901
const TargetRegisterClass *
39013902
SIRegisterInfo::getRegClass(unsigned RCID) const {
39023903
switch ((int)RCID) {
3903-
case AMDGPU::SReg_1RegClassID:
3904-
return getBoolRC();
3905-
case AMDGPU::SReg_1_XEXECRegClassID:
3906-
return getWaveMaskRegClass();
39073904
case -1:
39083905
return nullptr;
39093906
default:

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 71 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -915,20 +915,6 @@ def SReg_64_Encodable : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v
915915
let Size = 64;
916916
}
917917

918-
def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
919-
(add SReg_64_XEXEC, SReg_32_XEXEC)> {
920-
let CopyCost = 1;
921-
let isAllocatable = 0;
922-
let HasSGPR = 1;
923-
}
924-
925-
def SReg_1 : SIRegisterClass<"AMDGPU", [i1], 32,
926-
(add SReg_1_XEXEC, EXEC, EXEC_LO, EXEC_HI)> {
927-
let CopyCost = 1;
928-
let isAllocatable = 0;
929-
let HasSGPR = 1;
930-
}
931-
932918
multiclass SRegClass<int numRegs,
933919
list<ValueType> regTypes,
934920
SIRegisterTuples regList,
@@ -1208,79 +1194,140 @@ defm AV_512 : AVRegClass<16, VReg_512.RegTypes, (add VGPR_512), (add AGPR_512)>;
12081194
defm AV_1024 : AVRegClass<32, VReg_1024.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
12091195
}
12101196

1197+
def SReg_1_XEXEC : SIRegisterClassLike<0, false, false, true>,
1198+
RegClassByHwMode<
1199+
[DefaultMode_Wave64,
1200+
AlignedVGPRNoAGPRMode_Wave64,
1201+
AVAlign2LoadStoreMode,
1202+
DefaultMode_Wave32,
1203+
AlignedVGPRNoAGPRMode_Wave32],
1204+
[SReg_64_XEXEC,
1205+
SReg_64_XEXEC,
1206+
SReg_64_XEXEC,
1207+
SReg_32_XM0_XEXEC, // FIXME: Why do the wave32 cases exclude m0?
1208+
SReg_32_XM0_XEXEC]
1209+
>;
1210+
1211+
def SReg_1 : SIRegisterClassLike<0, false, false, true>,
1212+
RegClassByHwMode<
1213+
[DefaultMode_Wave64,
1214+
AlignedVGPRNoAGPRMode_Wave64,
1215+
AVAlign2LoadStoreMode,
1216+
DefaultMode_Wave32,
1217+
AlignedVGPRNoAGPRMode_Wave32],
1218+
[SReg_64,
1219+
SReg_64,
1220+
SReg_64,
1221+
SReg_32,
1222+
SReg_32]
1223+
>;
1224+
12111225
//===----------------------------------------------------------------------===//
12121226
//
12131227
// AlignTarget classes. Artifical classes to swap between
12141228
// even-aligned and any-aligned classes depending on subtarget.
12151229
//
12161230
//===----------------------------------------------------------------------===//
12171231

1232+
// We have 3 orthogonal properties to consider. Unfortunately we need
1233+
// to define the cross product of these states, minus unused
1234+
// combinations.
1235+
12181236
def AV_LdSt_32_Target : RegClassByHwMode<
1219-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1220-
[VGPR_32, AV_32, VGPR_32]>, SIRegisterClassLike<32, true, true> {
1237+
[DefaultMode_Wave64,
1238+
DefaultMode_Wave32,
1239+
AVAlign2LoadStoreMode,
1240+
AlignedVGPRNoAGPRMode_Wave64,
1241+
AlignedVGPRNoAGPRMode_Wave32],
1242+
[VGPR_32,
1243+
VGPR_32,
1244+
AV_32,
1245+
VGPR_32,
1246+
VGPR_32]>,
1247+
SIRegisterClassLike<32, true, true> {
12211248
let DecoderMethod = "decodeAVLdSt";
12221249
}
12231250

12241251
foreach RegSize = [ 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 512, 1024 ] in {
12251252
def VReg_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true>,
12261253
RegClassByHwMode<
1227-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1254+
[DefaultMode_Wave64,
1255+
DefaultMode_Wave32,
1256+
AVAlign2LoadStoreMode,
1257+
AlignedVGPRNoAGPRMode_Wave64,
1258+
AlignedVGPRNoAGPRMode_Wave32],
12281259
[!cast<RegisterClass>("VReg_"#RegSize),
1260+
!cast<RegisterClass>("VReg_"#RegSize),
1261+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12291262
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12301263
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12311264
let DecoderMethod = "DecodeVReg_"#RegSize#"RegisterClass";
12321265
}
12331266

12341267
def AReg_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, false, true>,
12351268
RegClassByHwMode<
1236-
[DefaultMode, AVAlign2LoadStoreMode, /*Unused combination*/],
1269+
[DefaultMode_Wave64, /*unused combination*/ AVAlign2LoadStoreMode, /*Unused combination*/ /*Unused combination*/],
12371270
[!cast<RegisterClass>("AReg_"#RegSize),
1271+
/*unused combination*/
12381272
!cast<RegisterClass>("AReg_"#RegSize#_Align2)
1273+
/*Unused combination*/
12391274
/*Unused combination*/]> {
12401275
let DecoderMethod = "DecodeAReg_"#RegSize#"RegisterClass";
12411276
}
12421277

12431278
def AV_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true, true>,
12441279
RegClassByHwMode<
1245-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1280+
[DefaultMode_Wave32,
1281+
DefaultMode_Wave64,
1282+
AVAlign2LoadStoreMode,
1283+
AlignedVGPRNoAGPRMode_Wave64,
1284+
AlignedVGPRNoAGPRMode_Wave32],
12461285
[!cast<RegisterClass>("AV_"#RegSize),
1286+
!cast<RegisterClass>("AV_"#RegSize),
12471287
!cast<RegisterClass>("AV_"#RegSize#_Align2),
1288+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12481289
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12491290
let DecoderMethod = "DecodeAV_"#RegSize#"RegisterClass";
12501291
}
12511292

12521293
def AV_LdSt_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true, true>,
12531294
RegClassByHwMode<
1254-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1295+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
12551296
[!cast<RegisterClass>("VReg_"#RegSize),
1297+
!cast<RegisterClass>("VReg_"#RegSize),
12561298
!cast<RegisterClass>("AV_"#RegSize#_Align2),
1299+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12571300
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12581301
let DecoderMethod = "decodeAVLdSt";
12591302
}
12601303

12611304
def AV_LdSt_#RegSize#_Align2 : SIRegisterClassLike<RegSize, true, true>,
12621305
RegClassByHwMode<
1263-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1306+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
12641307
[!cast<RegisterClass>("VReg_"#RegSize#_Align2),
1308+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12651309
!cast<RegisterClass>("AV_"#RegSize#_Align2),
1310+
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
12661311
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
12671312
let DecoderMethod = "decodeAVLdSt";
12681313
}
12691314

12701315
def AV_LdSt_#RegSize#_Align1 : SIRegisterClassLike<RegSize, true, true>,
12711316
RegClassByHwMode<
1272-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1317+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
12731318
[!cast<RegisterClass>("VReg_"#RegSize),
1319+
!cast<RegisterClass>("VReg_"#RegSize),
12741320
!cast<RegisterClass>("AV_"#RegSize),
1321+
!cast<RegisterClass>("VReg_"#RegSize),
12751322
!cast<RegisterClass>("VReg_"#RegSize)]> {
12761323
let DecoderMethod = "decodeAVLdSt";
12771324
}
12781325
}
12791326

12801327
def VS_64_AlignTarget : SIRegisterClassLike<64, true, false, true>,
12811328
RegClassByHwMode<
1282-
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
1283-
[VS_64, VS_64_Align2, VS_64_Align2]> {
1329+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
1330+
[VS_64, VS_64, VS_64_Align2, VS_64_Align2, VS_64_Align2]> {
12841331
let DecoderMethod = "decodeSrcRegOrImm9";
12851332
}
12861333

llvm/lib/Target/AMDGPU/VOPCInstructions.td

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,18 +1233,12 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
12331233
// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
12341234
// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
12351235
multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt, dag dstInst = (inst $src0, $src1)> {
1236-
let WaveSizePredicate = isWave64 in
12371236
def : GCNPat <
1238-
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1239-
(i64 (COPY_TO_REGCLASS dstInst, SReg_64))
1237+
(WaveSizeVT (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1238+
dstInst
12401239
>;
12411240

12421241
let WaveSizePredicate = isWave32 in {
1243-
def : GCNPat <
1244-
(i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1245-
(i32 (COPY_TO_REGCLASS dstInst, SReg_32))
1246-
>;
1247-
12481242
// Support codegen of i64 setcc in wave32 mode.
12491243
def : GCNPat <
12501244
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2197,12 +2197,12 @@ include "VOP3PInstructions.td"
21972197
include "VOPDInstructions.td"
21982198

21992199
class ClassPat<Instruction inst, ValueType vt> : GCNPat <
2200-
(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
2200+
(i1 (is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask))),
22012201
(inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask))
22022202
>;
22032203

22042204
class ClassPat_t16<Instruction inst, ValueType vt> : GCNPat <
2205-
(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
2205+
(i1 (is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask))),
22062206
(inst i32:$src0_mods, vt:$src0, SRCMODS.NONE, (V_MOV_B32_e32 timm:$mask))
22072207
>;
22082208

0 commit comments

Comments
 (0)