Skip to content

Commit 2ee12f1

Browse files
authored
AMDGPU: Use RegClassByHwMode to manage GWS operand special case (#169373)
On targets that require even aligned 64-bit VGPRs, GWS operands require even alignment of a 32-bit operand. Previously we had a hacky post-processing which added an implicit operand to try to manage the constraint. This would require special casing in other passes to avoid breaking the operand constraint. This moves the handling into the instruction definition, so other passes no longer need to consider this edge case. MC still does need to special case this, to print/parse as a 32-bit register. This also still ends up net less work than introducing even aligned 32-bit register classes. This also should be applied to the image special case.
1 parent 20ca85b commit 2ee12f1

File tree

13 files changed

+301
-264
lines changed

13 files changed

+301
-264
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3080,9 +3080,38 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
30803080
SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
30813081

30823082
const unsigned Opc = gwsIntrinToOpcode(IntrID);
3083+
3084+
const MCInstrDesc &InstrDesc = TII->get(Opc);
3085+
int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
3086+
3087+
const TargetRegisterClass *DataRC = TII->getRegClass(InstrDesc, Data0Idx);
3088+
30833089
SmallVector<SDValue, 5> Ops;
3084-
if (HasVSrc)
3085-
Ops.push_back(N->getOperand(2));
3090+
if (HasVSrc) {
3091+
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
3092+
3093+
SDValue Data = N->getOperand(2);
3094+
MVT DataVT = Data.getValueType().getSimpleVT();
3095+
if (TRI->isTypeLegalForClass(*DataRC, DataVT)) {
3096+
// Normal 32-bit case.
3097+
Ops.push_back(N->getOperand(2));
3098+
} else {
3099+
// Operand is really 32-bits, but requires 64-bit alignment, so use the
3100+
// even aligned 64-bit register class.
3101+
const SDValue RegSeqOps[] = {
3102+
CurDAG->getTargetConstant(DataRC->getID(), SL, MVT::i32), Data,
3103+
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3104+
SDValue(
3105+
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),
3106+
0),
3107+
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};
3108+
3109+
Ops.push_back(SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
3110+
SL, MVT::v2i32, RegSeqOps),
3111+
0));
3112+
}
3113+
}
3114+
30863115
Ops.push_back(OffsetField);
30873116
Ops.push_back(Chain);
30883117

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1946,20 +1946,52 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
19461946
// The resource id offset is computed as (<isa opaque base> + M0[21:16] +
19471947
// offset field) % 64. Some versions of the programming guide omit the m0
19481948
// part, or claim it's from offset 0.
1949-
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(gwsIntrinToOpcode(IID)));
1949+
1950+
unsigned Opc = gwsIntrinToOpcode(IID);
1951+
const MCInstrDesc &InstrDesc = TII.get(Opc);
19501952

19511953
if (HasVSrc) {
19521954
Register VSrc = MI.getOperand(1).getReg();
1953-
MIB.addReg(VSrc);
19541955

1955-
if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
1956-
return false;
1957-
}
1956+
int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
1957+
const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
1958+
const TargetRegisterClass *SubRC =
1959+
TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
1960+
1961+
if (!SubRC) {
1962+
// 32-bit normal case.
1963+
if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
1964+
return false;
19581965

1959-
MIB.addImm(ImmOffset)
1960-
.cloneMemRefs(MI);
1966+
BuildMI(*MBB, &MI, DL, InstrDesc)
1967+
.addReg(VSrc)
1968+
.addImm(ImmOffset)
1969+
.cloneMemRefs(MI);
1970+
} else {
1971+
// Requires even register alignment, so create 64-bit value and pad the
1972+
// top half with undef.
1973+
Register DataReg = MRI->createVirtualRegister(DataRC);
1974+
if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
1975+
return false;
19611976

1962-
TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0);
1977+
Register UndefReg = MRI->createVirtualRegister(SubRC);
1978+
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
1979+
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), DataReg)
1980+
.addReg(VSrc)
1981+
.addImm(AMDGPU::sub0)
1982+
.addReg(UndefReg)
1983+
.addImm(AMDGPU::sub1);
1984+
1985+
BuildMI(*MBB, &MI, DL, InstrDesc)
1986+
.addReg(DataReg)
1987+
.addImm(ImmOffset)
1988+
.cloneMemRefs(MI);
1989+
}
1990+
} else {
1991+
BuildMI(*MBB, &MI, DL, InstrDesc)
1992+
.addImm(ImmOffset)
1993+
.cloneMemRefs(MI);
1994+
}
19631995

19641996
MI.eraseFromParent();
19651997
return true;

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
347347
return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348348
}
349349

350+
bool isAV_LdSt_32_Align2_RegOp() const {
351+
return isRegClass(AMDGPU::VGPR_32RegClassID) ||
352+
isRegClass(AMDGPU::AGPR_32RegClassID);
353+
}
354+
350355
bool isVRegWithInputMods() const;
351356
template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
352357
template <bool IsFake16> bool isT16VRegWithInputMods() const;

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ class DS_GWS_0D <string opName>
463463

464464
class DS_GWS_1D <string opName>
465465
: DS_GWS<opName,
466-
(ins AVLdSt_32:$data0, Offset:$offset),
466+
(ins AV_LdSt_32_Align2_RegOp:$data0, Offset:$offset),
467467
" $data0$offset gds"> {
468468

469469
let has_gws_data0 = 1;

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,18 @@ void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
491491
printRegularOperand(MI, OpNo, STI, O);
492492
}
493493

494+
void AMDGPUInstPrinter::printAVLdSt32Align2RegOp(const MCInst *MI,
495+
unsigned OpNo,
496+
const MCSubtargetInfo &STI,
497+
raw_ostream &O) {
498+
MCRegister Reg = MI->getOperand(OpNo).getReg();
499+
500+
// On targets with an even alignment requirement
501+
if (MCRegister SubReg = MRI.getSubReg(Reg, AMDGPU::sub0))
502+
Reg = SubReg;
503+
printRegOperand(Reg, O, MRI);
504+
}
505+
494506
void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
495507
const MCSubtargetInfo &STI,
496508
raw_ostream &O) {

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ class AMDGPUInstPrinter : public MCInstPrinter {
7777
raw_ostream &O);
7878
void printVINTRPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
7979
raw_ostream &O);
80+
void printAVLdSt32Align2RegOp(const MCInst *MI, unsigned OpNo,
81+
const MCSubtargetInfo &STI, raw_ostream &O);
82+
8083
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
8184
raw_ostream &O);
8285
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6429,8 +6429,6 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
64296429
case AMDGPU::DS_GWS_INIT:
64306430
case AMDGPU::DS_GWS_SEMA_BR:
64316431
case AMDGPU::DS_GWS_BARRIER:
6432-
TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::data0);
6433-
[[fallthrough]];
64346432
case AMDGPU::DS_GWS_SEMA_V:
64356433
case AMDGPU::DS_GWS_SEMA_P:
64366434
case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,6 +1657,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
16571657

16581658
const TargetSchedModel &getSchedModel() const { return SchedModel; }
16591659

1660+
// FIXME: This should be removed
16601661
// Enforce operand's \p OpName even alignment if required by target.
16611662
// This is used if an operand is a 32 bit register but needs to be aligned
16621663
// regardless.

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,6 +1328,17 @@ def VS_64_AlignTarget : SIRegisterClassLike<64, true, false, true>,
13281328
let DecoderMethod = "decodeSrcRegOrImm9";
13291329
}
13301330

1331+
1332+
// Special case for DS_GWS instructions. The register input is really
1333+
// 32-bit, but it needs to be even aligned on targets with a VGPR
1334+
// alignment requirement.
1335+
def AV_LdSt_32_Align2 : SIRegisterClassLike</*Bitwidth=*/32, /*VGPR=*/true, /*AGPR=*/true>,
1336+
RegClassByHwMode<
1337+
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
1338+
[VGPR_32, VGPR_32, AV_64_Align2, VReg_64_Align2, VReg_64_Align2]> {
1339+
let DecoderMethod = "decodeAVLdSt<32>";
1340+
}
1341+
13311342
class RegImmMatcher<string name> : AsmOperandClass {
13321343
let Name = name;
13331344
let RenderMethod = "addRegOrImmOperands";
@@ -1580,6 +1591,17 @@ foreach size = ["64", "96", "128", "160", "256", "1024" ] in {
15801591
def AVLdSt_#size#_Align2 : AVLdStOperand<!cast<RegisterClassLike>("AV_LdSt_"#size#_Align2)>;
15811592
}
15821593

1594+
def AV_LdSt_32_Align2_RegMatcher : AsmOperandClass {
1595+
let Name = "AV_LdSt_32_Align2_RegOp";
1596+
let RenderMethod = "addRegOperands";
1597+
}
1598+
1599+
def AV_LdSt_32_Align2_RegOp : RegisterOperand<AV_LdSt_32_Align2> {
1600+
let ParserMatchClass = AV_LdSt_32_Align2_RegMatcher;
1601+
let PrintMethod = "printAVLdSt32Align2RegOp";
1602+
let EncoderMethod = "getAVOperandEncoding";
1603+
}
1604+
15831605
//===----------------------------------------------------------------------===//
15841606
// ACSrc_* Operands with an AGPR or an inline constant
15851607
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)