Skip to content

Commit ea14834

Browse files
authored
[AMDGPU] Per-subtarget DPP instruction classification (#153096)
This is NFCI at this point.
1 parent b9ecee9 commit ea14834

File tree

11 files changed

+120
-34
lines changed

11 files changed

+120
-34
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5653,7 +5653,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
56535653
unsigned SplitSize = 32;
56545654
if (IID == Intrinsic::amdgcn_update_dpp && (Size % 64 == 0) &&
56555655
ST.hasDPALU_DPP() &&
5656-
AMDGPU::isLegalDPALU_DPPControl(MI.getOperand(4).getImm()))
5656+
AMDGPU::isLegalDPALU_DPPControl(ST, MI.getOperand(4).getImm()))
56575657
SplitSize = 64;
56585658

56595659
if (Size == SplitSize) {

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5052,11 +5052,13 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
50525052
if (DppCtrlIdx >= 0) {
50535053
unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
50545054

5055-
if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
5056-
AMDGPU::isDPALU_DPP(MII.get(Opc))) {
5057-
// DP ALU DPP is supported for row_newbcast only on GFX9*
5055+
if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5056+
AMDGPU::isDPALU_DPP(MII.get(Opc), getSTI())) {
5057+
// DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5058+
// only on GFX12.
50585059
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5059-
Error(S, "DP ALU dpp only supports row_newbcast");
5060+
Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5061+
: "DP ALU dpp only supports row_newbcast");
50605062
return false;
50615063
}
50625064
}

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -549,11 +549,17 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
549549
return false;
550550
}
551551

552-
if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
553-
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
554-
auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
555-
assert(DppCtrl && DppCtrl->isImm());
556-
if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl->getImm())) {
552+
auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
553+
assert(DppCtrl && DppCtrl->isImm());
554+
unsigned DppCtrlVal = DppCtrl->getImm();
555+
if ((MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
556+
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp)) {
557+
if (!ST->hasFeature(AMDGPU::FeatureDPALU_DPP)) {
558+
LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move is unsupported\n");
559+
// Split it.
560+
return false;
561+
}
562+
if (!AMDGPU::isLegalDPALU_DPPControl(*ST, DppCtrlVal)) {
557563
LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move uses unsupported"
558564
" control value\n");
559565
// Let it split, then control may become legal.
@@ -709,6 +715,20 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
709715
break;
710716
}
711717

718+
if (!ST->hasFeature(AMDGPU::FeatureDPALU_DPP) &&
719+
AMDGPU::isDPALU_DPP32BitOpc(OrigOp)) {
720+
LLVM_DEBUG(dbgs() << " " << OrigMI
721+
<< " failed: DPP ALU DPP is not supported\n");
722+
break;
723+
}
724+
725+
if (!AMDGPU::isLegalDPALU_DPPControl(*ST, DppCtrlVal) &&
726+
AMDGPU::isDPALU_DPP(TII->get(OrigOp), *ST)) {
727+
LLVM_DEBUG(dbgs() << " " << OrigMI
728+
<< " failed: not valid 64-bit DPP control value\n");
729+
break;
730+
}
731+
712732
LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
713733
if (Use == Src0) {
714734
if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -976,8 +976,10 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
976976
unsigned Imm = MI->getOperand(OpNo).getImm();
977977
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
978978

979-
if (!AMDGPU::isLegalDPALU_DPPControl(Imm) && AMDGPU::isDPALU_DPP(Desc)) {
980-
O << " /* DP ALU dpp only supports row_newbcast */";
979+
if (!AMDGPU::isLegalDPALU_DPPControl(STI, Imm) &&
980+
AMDGPU::isDPALU_DPP(Desc, STI)) {
981+
O << " /* DP ALU dpp only supports "
982+
<< (isGFX12(STI) ? "row_share" : "row_newbcast") << " */";
981983
return;
982984
}
983985
if (Imm <= DppCtrl::QUAD_PERM_LAST) {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6621,7 +6621,7 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
66216621
unsigned SplitSize = 32;
66226622
if (IID == Intrinsic::amdgcn_update_dpp && (ValSize % 64 == 0) &&
66236623
ST->hasDPALU_DPP() &&
6624-
AMDGPU::isLegalDPALU_DPPControl(N->getConstantOperandVal(3)))
6624+
AMDGPU::isLegalDPALU_DPPControl(*ST, N->getConstantOperandVal(3)))
66256625
SplitSize = 64;
66266626

66276627
auto createLaneOp = [&DAG, &SL, N, IID](SDValue Src0, SDValue Src1,

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2616,9 +2616,9 @@ std::pair<MachineInstr*, MachineInstr*>
26162616
SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
26172617
assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
26182618

2619-
if (ST.hasMovB64() &&
2619+
if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
26202620
AMDGPU::isLegalDPALU_DPPControl(
2621-
getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
2621+
ST, getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
26222622
MI.setDesc(get(AMDGPU::V_MOV_B64_dpp));
26232623
return std::pair(&MI, nullptr);
26242624
}
@@ -5433,7 +5433,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
54335433
}
54345434

54355435
if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5436-
!AMDGPU::isLegalDPALU_DPPControl(DC) && AMDGPU::isDPALU_DPP(Desc)) {
5436+
!AMDGPU::isLegalDPALU_DPPControl(ST, DC) &&
5437+
AMDGPU::isDPALU_DPP(Desc, ST)) {
54375438
ErrInfo = "Invalid dpp_ctrl value: "
54385439
"DP ALU dpp only support row_newbcast";
54395440
return false;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1954,6 +1954,7 @@ class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
19541954
!eq(VT, v2f16) : VCSrc_v2f16,
19551955
!eq(VT, v2bf16) : VCSrc_v2bf16,
19561956
!eq(VT, f32) : VCSrc_f32,
1957+
!eq(VT, f64) : VCSrc_f64,
19571958
!eq(VT, v2i32) : VCSrc_v2b32,
19581959
1 : VCSrc_b32);
19591960
}

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3309,7 +3309,33 @@ bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
33093309
return false;
33103310
}
33113311

3312-
bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
3312+
bool isDPALU_DPP32BitOpc(unsigned Opc) {
3313+
switch (Opc) {
3314+
case AMDGPU::V_MUL_LO_U32_e64:
3315+
case AMDGPU::V_MUL_LO_U32_e64_dpp:
3316+
case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3317+
case AMDGPU::V_MUL_HI_U32_e64:
3318+
case AMDGPU::V_MUL_HI_U32_e64_dpp:
3319+
case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3320+
case AMDGPU::V_MUL_HI_I32_e64:
3321+
case AMDGPU::V_MUL_HI_I32_e64_dpp:
3322+
case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3323+
case AMDGPU::V_MAD_U32_e64:
3324+
case AMDGPU::V_MAD_U32_e64_dpp:
3325+
case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3326+
return true;
3327+
default:
3328+
return false;
3329+
}
3330+
}
3331+
3332+
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST) {
3333+
if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))
3334+
return false;
3335+
3336+
if (isDPALU_DPP32BitOpc(OpDesc.getOpcode()))
3337+
return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);
3338+
33133339
return hasAny64BitVGPROperands(OpDesc);
33143340
}
33153341

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1750,15 +1750,22 @@ unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
17501750
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
17511751

17521752
LLVM_READNONE
1753-
inline bool isLegalDPALU_DPPControl(unsigned DC) {
1754-
return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1753+
inline bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC) {
1754+
if (isGFX12(ST))
1755+
return DC >= DPP::ROW_SHARE_FIRST && DC <= DPP::ROW_SHARE_LAST;
1756+
if (isGFX90A(ST))
1757+
return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1758+
return false;
17551759
}
17561760

17571761
/// \returns true if an instruction may have a 64-bit VGPR operand.
17581762
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
17591763

1764+
/// \returns true if an instruction is a DP ALU DPP without any 64-bit operands.
1765+
bool isDPALU_DPP32BitOpc(unsigned Opc);
1766+
17601767
/// \returns true if an instruction is a DP ALU DPP.
1761-
bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1768+
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST);
17621769

17631770
/// \returns true if the intrinsic is divergent
17641771
bool isIntrinsicSourceOfDivergence(unsigned IntrID);

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2084,6 +2084,9 @@ multiclass VOP3_Realtriple_gfx11_gfx12<bits<10> op> :
20842084
multiclass VOP3_Real_Base_gfx11_gfx12<bits<10> op> :
20852085
VOP3_Real_Base<GFX11Gen, op>, VOP3_Real_Base<GFX12Gen, op>;
20862086

2087+
multiclass VOP3_Real_Base_gfx11_gfx12_not_gfx1250<bits<10> op> :
2088+
VOP3_Real_Base<GFX11Gen, op>, VOP3_Real_Base<GFX12Not12_50Gen, op>;
2089+
20872090
multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
20882091
string asmName> :
20892092
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
@@ -2211,9 +2214,9 @@ defm V_MUL_F64 : VOP3_Real_Base_gfx11<0x328>;
22112214
defm V_MIN_F64 : VOP3_Real_Base_gfx11<0x329>;
22122215
defm V_MAX_F64 : VOP3_Real_Base_gfx11<0x32a>;
22132216
defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32b>;
2214-
defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12<0x32c>;
2215-
defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12<0x32d>;
2216-
defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12<0x32e>;
2217+
defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32c>;
2218+
defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32d>;
2219+
defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32e>;
22172220
defm V_TRIG_PREOP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32f>;
22182221
defm V_LSHLREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x338, "v_lshlrev_b16">;
22192222
defm V_LSHRREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x339, "v_lshrrev_b16">;
@@ -2242,6 +2245,10 @@ let AssemblerPredicate = isGFX11Plus in {
22422245
}
22432246

22442247
// These instructions differ from GFX12 variant by supporting DPP:
2248+
defm V_MUL_LO_U32 : VOP3Only_Realtriple_gfx1250<0x32c>;
2249+
defm V_MUL_HI_U32 : VOP3Only_Realtriple_gfx1250<0x32d>;
2250+
defm V_MUL_HI_I32 : VOP3Only_Realtriple_gfx1250<0x32e>;
2251+
22452252
defm V_PERM_PK16_B4_U4 : VOP3Only_Real_Base_gfx1250<0x23f>;
22462253
defm V_PERM_PK16_B6_U4 : VOP3Only_Real_Base_gfx1250<0x242>;
22472254
defm V_PERM_PK16_B8_U4 : VOP3Only_Real_Base_gfx1250<0x243>;

0 commit comments

Comments
 (0)