Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 33 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -2952,15 +2952,46 @@ def HasSetPrioIncWgInst : Predicate<"Subtarget->hasSetPrioIncWgInst()">,
def NeedsAlignedVGPRs : Predicate<"Subtarget->needsAlignedVGPRs()">,
AssemblerPredicate<(all_of FeatureRequiresAlignedVGPRs)>;

def NotNeedsAlignedVGPRs : Predicate<"!Subtarget->needsAlignedVGPRs()">,
AssemblerPredicate<(all_of (not FeatureRequiresAlignedVGPRs))>;

def isWave32 : Predicate<"Subtarget->isWave32()">,
AssemblerPredicate <(any_of FeatureWavefrontSize32,
FeatureAssemblerPermissiveWavesize)>;
def isWave64 : Predicate<"Subtarget->isWave64()">,
AssemblerPredicate <(any_of FeatureWavefrontSize64,
FeatureAssemblerPermissiveWavesize)>;

def isWave32Strict : Predicate<"Subtarget->isWave32()">,
AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
def isWave64Strict : Predicate<"Subtarget->isWave64()">,
AssemblerPredicate <(all_of FeatureWavefrontSize64)>;

//===----------------------------------------------------------------------===//
// HwModes
//===----------------------------------------------------------------------===//

// gfx90a-gfx950. Has AGPRs, and also the align2 VGPR/AGPR requirement
defvar DefaultMode_Wave64 = DefaultMode;
defvar DefaultMode_Wave32 = HwMode<[isWave32, NotNeedsAlignedVGPRs]>;

// gfx90a-gfx950. Has AGPRs, and also the align2 VGPR/AGPR requirement. Implied
// wave64.
def AVAlign2LoadStoreMode : HwMode<[HasMAIInsts, NeedsAlignedVGPRs]>;

// gfx1250, has alignment requirement but no AGPRs.
def AlignedVGPRNoAGPRMode : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs]>;
def AlignedVGPRNoAGPRMode_Wave32 : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs, isWave32Strict]>;
def AlignedVGPRNoAGPRMode_Wave64 : HwMode<[NotHasMAIInsts, NeedsAlignedVGPRs, isWave64Strict]>;

// FIXME: This should be able to only define a separate hwmode that
// only depends on wavesize for just ValueTypes. These use different
// HwMode namespaces. If we don't define the full set of modes used
// for RegClassByHwMode, tablegen crashes for some reason
def WaveSizeVT : ValueTypeByHwMode<[
DefaultMode_Wave64,
AVAlign2LoadStoreMode,
AlignedVGPRNoAGPRMode_Wave64,
DefaultMode_Wave32,
AlignedVGPRNoAGPRMode_Wave32], [i64, i64, i64, i32, i32]>;


// Include AMDGPU TD files
Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,6 @@ unsigned AMDGPURegisterBankInfo::getBreakDownCost(
const RegisterBank &
AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
LLT Ty) const {
if (&RC == &AMDGPU::SReg_1RegClass)
return AMDGPU::VCCRegBank;

// We promote real scalar booleans to SReg_32. Any SGPR using s1 is really a
// VCC-like use.
if (TRI->isSGPRClass(&RC)) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def VGPRRegBank : RegisterBank<"VGPR",
>;

// It is helpful to distinguish conditions from ordinary SGPRs.
def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
def VCCRegBank : RegisterBank<"VCC", [SReg_32, SReg_64]>;

def AGPRRegBank : RegisterBank <"AGPR",
[AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_224, AReg_256, AReg_288, AReg_320, AReg_352, AReg_384, AReg_512, AReg_1024]
Expand Down
20 changes: 15 additions & 5 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -789,14 +789,24 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
// Intention: print disassembler message when invalid code is decoded,
// for example sgpr register used in VReg or VISrc(VReg or imm) operand.
const MCOperandInfo &OpInfo = Desc.operands()[OpNo];
int16_t RCID = MII.getOpRegClassID(
OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
if (RCID != -1) {
if (OpInfo.RegClass != -1) {
int16_t RCID = MII.getOpRegClassID(
OpInfo, STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
const MCRegisterClass &RC = MRI.getRegClass(RCID);
auto Reg = mc2PseudoReg(Op.getReg());
if (!RC.contains(Reg) && !isInlineValue(Reg)) {
O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
<< "\' register class*/";
bool IsWaveSizeOp = OpInfo.isLookupRegClassByHwMode() &&
(OpInfo.RegClass == AMDGPU::SReg_1 ||
OpInfo.RegClass == AMDGPU::SReg_1_XEXEC);
// Suppress this comment for a mismatched wavesize. Some users expect to
// be able to assemble and disassemble modules with mixed wavesizes, but
// we do not know the subtarget in different functions in MC.
//
// TODO: Should probably print it anyway, maybe a more specific version.
if (!IsWaveSizeOp) {
O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
<< "\' register class*/";
}
}
}
} else if (Op.isImm()) {
Expand Down
7 changes: 0 additions & 7 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,6 @@
//
//===----------------------------------------------------------------------===//

def isWave32 : Predicate<"Subtarget->isWave32()">,
AssemblerPredicate <(any_of FeatureWavefrontSize32,
FeatureAssemblerPermissiveWavesize)>;
def isWave64 : Predicate<"Subtarget->isWave64()">,
AssemblerPredicate <(any_of FeatureWavefrontSize64,
FeatureAssemblerPermissiveWavesize)>;

class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
: MnemonicAlias<From, To, VariantName>, PredicateControl;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ Vreg1LoweringHelper::Vreg1LoweringHelper(MachineFunction *MF,
bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) {
assert(Changed || ConstrainRegs.empty());
for (Register Reg : ConstrainRegs)
MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
MRI->constrainRegClass(Reg, TII->getRegisterInfo().getWaveMaskRegClass());
ConstrainRegs.clear();

return Changed;
Expand Down
5 changes: 1 addition & 4 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3888,13 +3888,10 @@ const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
: &AMDGPU::VReg_64RegClass;
}

// FIXME: This should be deleted
const TargetRegisterClass *
SIRegisterInfo::getRegClass(unsigned RCID) const {
switch ((int)RCID) {
case AMDGPU::SReg_1RegClassID:
return getBoolRC();
case AMDGPU::SReg_1_XEXECRegClassID:
return getWaveMaskRegClass();
case -1:
return nullptr;
default:
Expand Down
95 changes: 71 additions & 24 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -896,20 +896,6 @@ def SReg_64_Encodable : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v
let Size = 64;
}

def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
(add SReg_64_XEXEC, SReg_32_XEXEC)> {
let CopyCost = 1;
let isAllocatable = 0;
let HasSGPR = 1;
}

def SReg_1 : SIRegisterClass<"AMDGPU", [i1], 32,
(add SReg_1_XEXEC, EXEC, EXEC_LO, EXEC_HI)> {
let CopyCost = 1;
let isAllocatable = 0;
let HasSGPR = 1;
}

multiclass SRegClass<int numRegs,
list<ValueType> regTypes,
SIRegisterTuples regList,
Expand Down Expand Up @@ -1205,79 +1191,140 @@ defm AV_512 : AVRegClass<16, VReg_512.RegTypes, (add VGPR_512), (add AGPR_512)>;
defm AV_1024 : AVRegClass<32, VReg_1024.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
}

def SReg_1_XEXEC : SIRegisterClassLike<0, false, false, true>,
RegClassByHwMode<
[DefaultMode_Wave64,
AlignedVGPRNoAGPRMode_Wave64,
AVAlign2LoadStoreMode,
DefaultMode_Wave32,
AlignedVGPRNoAGPRMode_Wave32],
[SReg_64_XEXEC,
SReg_64_XEXEC,
SReg_64_XEXEC,
SReg_32_XM0_XEXEC, // FIXME: Why do the wave32 cases exclude m0?
SReg_32_XM0_XEXEC]
>;

def SReg_1 : SIRegisterClassLike<0, false, false, true>,
RegClassByHwMode<
[DefaultMode_Wave64,
AlignedVGPRNoAGPRMode_Wave64,
AVAlign2LoadStoreMode,
DefaultMode_Wave32,
AlignedVGPRNoAGPRMode_Wave32],
[SReg_64,
SReg_64,
SReg_64,
SReg_32,
SReg_32]
>;

//===----------------------------------------------------------------------===//
//
// AlignTarget classes. Artifical classes to swap between
// even-aligned and any-aligned classes depending on subtarget.
//
//===----------------------------------------------------------------------===//

// We have 3 orthogonal properties to consider. Unfortunately we need
// to define the cross product of these states, minus unused
// combinations.

def AV_LdSt_32_Target : RegClassByHwMode<
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
[VGPR_32, AV_32, VGPR_32]>, SIRegisterClassLike<32, true, true> {
[DefaultMode_Wave64,
DefaultMode_Wave32,
AVAlign2LoadStoreMode,
AlignedVGPRNoAGPRMode_Wave64,
AlignedVGPRNoAGPRMode_Wave32],
[VGPR_32,
VGPR_32,
AV_32,
VGPR_32,
VGPR_32]>,
SIRegisterClassLike<32, true, true> {
let DecoderMethod = "decodeAVLdSt";
}

foreach RegSize = [ 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 512, 1024 ] in {
def VReg_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true>,
RegClassByHwMode<
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
[DefaultMode_Wave64,
DefaultMode_Wave32,
AVAlign2LoadStoreMode,
AlignedVGPRNoAGPRMode_Wave64,
AlignedVGPRNoAGPRMode_Wave32],
[!cast<RegisterClass>("VReg_"#RegSize),
!cast<RegisterClass>("VReg_"#RegSize),
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
let DecoderMethod = "DecodeVReg_"#RegSize#"RegisterClass";
}

def AReg_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, false, true>,
RegClassByHwMode<
[DefaultMode, AVAlign2LoadStoreMode, /*Unused combination*/],
[DefaultMode_Wave64, /*unused combination*/ AVAlign2LoadStoreMode, /*Unused combination*/ /*Unused combination*/],
[!cast<RegisterClass>("AReg_"#RegSize),
/*unused combination*/
!cast<RegisterClass>("AReg_"#RegSize#_Align2)
/*Unused combination*/
/*Unused combination*/]> {
let DecoderMethod = "DecodeAReg_"#RegSize#"RegisterClass";
}

def AV_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true, true>,
RegClassByHwMode<
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
[DefaultMode_Wave32,
DefaultMode_Wave64,
AVAlign2LoadStoreMode,
AlignedVGPRNoAGPRMode_Wave64,
AlignedVGPRNoAGPRMode_Wave32],
[!cast<RegisterClass>("AV_"#RegSize),
!cast<RegisterClass>("AV_"#RegSize),
!cast<RegisterClass>("AV_"#RegSize#_Align2),
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
let DecoderMethod = "DecodeAV_"#RegSize#"RegisterClass";
}

def AV_LdSt_#RegSize#_AlignTarget : SIRegisterClassLike<RegSize, true, true>,
RegClassByHwMode<
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
[!cast<RegisterClass>("VReg_"#RegSize),
!cast<RegisterClass>("VReg_"#RegSize),
!cast<RegisterClass>("AV_"#RegSize#_Align2),
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
let DecoderMethod = "decodeAVLdSt";
}

def AV_LdSt_#RegSize#_Align2 : SIRegisterClassLike<RegSize, true, true>,
RegClassByHwMode<
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
[!cast<RegisterClass>("VReg_"#RegSize#_Align2),
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
!cast<RegisterClass>("AV_"#RegSize#_Align2),
!cast<RegisterClass>("VReg_"#RegSize#_Align2),
!cast<RegisterClass>("VReg_"#RegSize#_Align2)]> {
let DecoderMethod = "decodeAVLdSt";
}

def AV_LdSt_#RegSize#_Align1 : SIRegisterClassLike<RegSize, true, true>,
RegClassByHwMode<
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
[!cast<RegisterClass>("VReg_"#RegSize),
!cast<RegisterClass>("VReg_"#RegSize),
!cast<RegisterClass>("AV_"#RegSize),
!cast<RegisterClass>("VReg_"#RegSize),
!cast<RegisterClass>("VReg_"#RegSize)]> {
let DecoderMethod = "decodeAVLdSt";
}
}

def VS_64_AlignTarget : SIRegisterClassLike<64, true, false, true>,
RegClassByHwMode<
[DefaultMode, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode],
[VS_64, VS_64_Align2, VS_64_Align2]> {
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
[VS_64, VS_64, VS_64_Align2, VS_64_Align2, VS_64_Align2]> {
let DecoderMethod = "decodeSrcRegOrImm9";
}

Expand Down
10 changes: 2 additions & 8 deletions llvm/lib/Target/AMDGPU/VOPCInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1233,18 +1233,12 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt, dag dstInst = (inst $src0, $src1)> {
let WaveSizePredicate = isWave64 in
def : GCNPat <
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
(i64 (COPY_TO_REGCLASS dstInst, SReg_64))
(WaveSizeVT (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
dstInst
>;

let WaveSizePredicate = isWave32 in {
def : GCNPat <
(i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
(i32 (COPY_TO_REGCLASS dstInst, SReg_32))
>;

// Support codegen of i64 setcc in wave32 mode.
def : GCNPat <
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/VOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2204,12 +2204,12 @@ include "VOP3PInstructions.td"
include "VOPDInstructions.td"

class ClassPat<Instruction inst, ValueType vt> : GCNPat <
(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
(i1 (is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask))),
(inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask))
>;

class ClassPat_t16<Instruction inst, ValueType vt> : GCNPat <
(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
(i1 (is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask))),
(inst i32:$src0_mods, vt:$src0, SRCMODS.NONE, (V_MOV_B32_e32 timm:$mask))
>;

Expand Down
Loading