Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3080,9 +3080,38 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);

const unsigned Opc = gwsIntrinToOpcode(IntrID);

const MCInstrDesc &InstrDesc = TII->get(Opc);
int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);

const TargetRegisterClass *DataRC = TII->getRegClass(InstrDesc, Data0Idx);

SmallVector<SDValue, 5> Ops;
if (HasVSrc)
Ops.push_back(N->getOperand(2));
if (HasVSrc) {
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();

SDValue Data = N->getOperand(2);
MVT DataVT = Data.getValueType().getSimpleVT();
if (TRI->isTypeLegalForClass(*DataRC, DataVT)) {
// Normal 32-bit case.
Ops.push_back(N->getOperand(2));
} else {
// Operand is really 32-bits, but requires 64-bit alignment, so use the
// even aligned 64-bit register class.
const SDValue RegSeqOps[] = {
CurDAG->getTargetConstant(DataRC->getID(), SL, MVT::i32), Data,
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),
0),
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};

Ops.push_back(SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
SL, MVT::v2i32, RegSeqOps),
0));
}
}

Ops.push_back(OffsetField);
Ops.push_back(Chain);

Expand Down
48 changes: 40 additions & 8 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1946,20 +1946,52 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
// The resource id offset is computed as (<isa opaque base> + M0[21:16] +
// offset field) % 64. Some versions of the programming guide omit the m0
// part, or claim it's from offset 0.
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(gwsIntrinToOpcode(IID)));

unsigned Opc = gwsIntrinToOpcode(IID);
const MCInstrDesc &InstrDesc = TII.get(Opc);

if (HasVSrc) {
Register VSrc = MI.getOperand(1).getReg();
MIB.addReg(VSrc);

if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
return false;
}
int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
const TargetRegisterClass *SubRC =
TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);

if (!SubRC) {
// 32-bit normal case.
if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
return false;

MIB.addImm(ImmOffset)
.cloneMemRefs(MI);
BuildMI(*MBB, &MI, DL, InstrDesc)
.addReg(VSrc)
.addImm(ImmOffset)
.cloneMemRefs(MI);
} else {
// Requires even register alignment, so create 64-bit value and pad the
// top half with undef.
Register DataReg = MRI->createVirtualRegister(DataRC);
if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
return false;

TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0);
Register UndefReg = MRI->createVirtualRegister(SubRC);
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), DataReg)
.addReg(VSrc)
.addImm(AMDGPU::sub0)
.addReg(UndefReg)
.addImm(AMDGPU::sub1);

BuildMI(*MBB, &MI, DL, InstrDesc)
.addReg(DataReg)
.addImm(ImmOffset)
.cloneMemRefs(MI);
}
} else {
BuildMI(*MBB, &MI, DL, InstrDesc)
.addImm(ImmOffset)
.cloneMemRefs(MI);
}

MI.eraseFromParent();
return true;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
}

bool isAV_LdSt_32_Align2_RegOp() const {
return isRegClass(AMDGPU::VGPR_32RegClassID) ||
isRegClass(AMDGPU::AGPR_32RegClassID);
}

bool isVRegWithInputMods() const;
template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
template <bool IsFake16> bool isT16VRegWithInputMods() const;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/DSInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ class DS_GWS_0D <string opName>

class DS_GWS_1D <string opName>
: DS_GWS<opName,
(ins AVLdSt_32:$data0, Offset:$offset),
(ins AV_LdSt_32_Align2_RegOp:$data0, Offset:$offset),
" $data0$offset gds"> {

let has_gws_data0 = 1;
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,18 @@ void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
printRegularOperand(MI, OpNo, STI, O);
}

void AMDGPUInstPrinter::printAVLdSt32Align2RegOp(const MCInst *MI,
unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
MCRegister Reg = MI->getOperand(OpNo).getReg();

// On targets with an even alignment requirement
if (MCRegister SubReg = MRI.getSubReg(Reg, AMDGPU::sub0))
Reg = SubReg;
printRegOperand(Reg, O, MRI);
}

void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ class AMDGPUInstPrinter : public MCInstPrinter {
raw_ostream &O);
void printVINTRPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printAVLdSt32Align2RegOp(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);

void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6429,8 +6429,6 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case AMDGPU::DS_GWS_INIT:
case AMDGPU::DS_GWS_SEMA_BR:
case AMDGPU::DS_GWS_BARRIER:
TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::data0);
[[fallthrough]];
case AMDGPU::DS_GWS_SEMA_V:
case AMDGPU::DS_GWS_SEMA_P:
case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1657,6 +1657,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {

const TargetSchedModel &getSchedModel() const { return SchedModel; }

// FIXME: This should be removed
// Enforce operand's \p OpName even alignment if required by target.
// This is used if an operand is a 32 bit register but needs to be aligned
// regardless.
Expand Down
22 changes: 22 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1328,6 +1328,17 @@ def VS_64_AlignTarget : SIRegisterClassLike<64, true, false, true>,
let DecoderMethod = "decodeSrcRegOrImm9";
}


// Special case for DS_GWS instructions. The register input is really
// 32-bit, but it needs to be even aligned on targets with a VGPR
// alignment requirement.
def AV_LdSt_32_Align2 : SIRegisterClassLike</*Bitwidth=*/32, /*VGPR=*/true, /*AGPR=*/true>,
RegClassByHwMode<
[DefaultMode_Wave64, DefaultMode_Wave32, AVAlign2LoadStoreMode, AlignedVGPRNoAGPRMode_Wave64, AlignedVGPRNoAGPRMode_Wave32],
[VGPR_32, VGPR_32, AV_64_Align2, VReg_64_Align2, VReg_64_Align2]> {
let DecoderMethod = "decodeAVLdSt<32>";
}

class RegImmMatcher<string name> : AsmOperandClass {
let Name = name;
let RenderMethod = "addRegOrImmOperands";
Expand Down Expand Up @@ -1580,6 +1591,17 @@ foreach size = ["64", "96", "128", "160", "256", "1024" ] in {
def AVLdSt_#size#_Align2 : AVLdStOperand<!cast<RegisterClassLike>("AV_LdSt_"#size#_Align2)>;
}

def AV_LdSt_32_Align2_RegMatcher : AsmOperandClass {
let Name = "AV_LdSt_32_Align2_RegOp";
let RenderMethod = "addRegOperands";
}

def AV_LdSt_32_Align2_RegOp : RegisterOperand<AV_LdSt_32_Align2> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if I could do something similar to the pseudo register class that I added for downstream support such that I don't need to do a check on register class?

let ParserMatchClass = AV_LdSt_32_Align2_RegMatcher;
let PrintMethod = "printAVLdSt32Align2RegOp";
let EncoderMethod = "getAVOperandEncoding";
}

//===----------------------------------------------------------------------===//
// ACSrc_* Operands with an AGPR or an inline constant
//===----------------------------------------------------------------------===//
Expand Down
Loading
Loading