Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM NVPTXGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)

add_public_tablegen_target(NVPTXCommonTableGen)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "NVPTX.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXSelectionDAGInfo.h"
#include "NVPTXTargetMachine.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
Expand Down
106 changes: 8 additions & 98 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXISelDAGToDAG.h"
#include "NVPTXSelectionDAGInfo.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
Expand Down Expand Up @@ -1107,97 +1108,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
{MVT::i32, MVT::i128, MVT::v4f32, MVT::Other}, Custom);
}

const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {

#define MAKE_CASE(V) \
case V: \
return #V;

switch ((NVPTXISD::NodeType)Opcode) {
case NVPTXISD::FIRST_NUMBER:
break;

MAKE_CASE(NVPTXISD::ATOMIC_CMP_SWAP_B128)
MAKE_CASE(NVPTXISD::ATOMIC_SWAP_B128)
MAKE_CASE(NVPTXISD::RET_GLUE)
MAKE_CASE(NVPTXISD::DeclareArrayParam)
MAKE_CASE(NVPTXISD::DeclareScalarParam)
MAKE_CASE(NVPTXISD::CALL)
MAKE_CASE(NVPTXISD::MoveParam)
MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
MAKE_CASE(NVPTXISD::BUILD_VECTOR)
MAKE_CASE(NVPTXISD::CallPrototype)
MAKE_CASE(NVPTXISD::ProxyReg)
MAKE_CASE(NVPTXISD::LoadV2)
MAKE_CASE(NVPTXISD::LoadV4)
MAKE_CASE(NVPTXISD::LoadV8)
MAKE_CASE(NVPTXISD::LDUV2)
MAKE_CASE(NVPTXISD::LDUV4)
MAKE_CASE(NVPTXISD::StoreV2)
MAKE_CASE(NVPTXISD::StoreV4)
MAKE_CASE(NVPTXISD::StoreV8)
MAKE_CASE(NVPTXISD::FSHL_CLAMP)
MAKE_CASE(NVPTXISD::FSHR_CLAMP)
MAKE_CASE(NVPTXISD::BFI)
MAKE_CASE(NVPTXISD::PRMT)
MAKE_CASE(NVPTXISD::FCOPYSIGN)
MAKE_CASE(NVPTXISD::FMAXNUM3)
MAKE_CASE(NVPTXISD::FMINNUM3)
MAKE_CASE(NVPTXISD::FMAXIMUM3)
MAKE_CASE(NVPTXISD::FMINIMUM3)
MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
MAKE_CASE(NVPTXISD::STACKRESTORE)
MAKE_CASE(NVPTXISD::STACKSAVE)
MAKE_CASE(NVPTXISD::SETP_F16X2)
MAKE_CASE(NVPTXISD::SETP_BF16X2)
MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
MAKE_CASE(NVPTXISD::BrxEnd)
MAKE_CASE(NVPTXISD::BrxItem)
MAKE_CASE(NVPTXISD::BrxStart)
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED)
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X)
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y)
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
MAKE_CASE(
NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
MAKE_CASE(
NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(
NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
MAKE_CASE(
NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
MAKE_CASE(NVPTXISD::CVT_E4M3X4_F32X4_RS_SF)
MAKE_CASE(NVPTXISD::CVT_E5M2X4_F32X4_RS_SF)
MAKE_CASE(NVPTXISD::CVT_E2M3X4_F32X4_RS_SF)
MAKE_CASE(NVPTXISD::CVT_E3M2X4_F32X4_RS_SF)
MAKE_CASE(NVPTXISD::CVT_E2M1X4_F32X4_RS_SF)
}
return nullptr;

#undef MAKE_CASE
}

TargetLoweringBase::LegalizeTypeAction
NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
Expand Down Expand Up @@ -2032,7 +1942,7 @@ static ISD::NodeType getScalarOpcodeForReduction(unsigned ReductionOpcode) {
}

/// Get 3-input scalar reduction opcode
static std::optional<NVPTXISD::NodeType>
static std::optional<unsigned>
getScalar3OpcodeForReduction(unsigned ReductionOpcode) {
switch (ReductionOpcode) {
case ISD::VECREDUCE_FMAX:
Expand Down Expand Up @@ -2931,7 +2841,7 @@ static SDValue lowerCvtRSIntrinsics(SDValue Op, SelectionDAG &DAG) {
using NVPTX::PTXCvtMode::CvtMode;

auto [OpCode, RetTy, CvtModeFlag] =
[&]() -> std::tuple<NVPTXISD::NodeType, MVT::SimpleValueType, uint32_t> {
[&]() -> std::tuple<unsigned, MVT::SimpleValueType, uint32_t> {
switch (IntrinsicID) {
case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite:
return {NVPTXISD::CVT_E4M3X4_F32X4_RS_SF, MVT::v4i8,
Expand Down Expand Up @@ -3314,7 +3224,7 @@ SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
// Generate BrxEnd nodes
SDValue EndOps[] = {Chain.getValue(0), DAG.getBasicBlock(MBBs.back()), Index,
IdV, Chain.getValue(1)};
SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, VTs, EndOps);
SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, MVT::Other, EndOps);

return BrxEnd;
}
Expand Down Expand Up @@ -5457,7 +5367,7 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SDLoc DL(LD);

// the new opcode after we double the number of operands
NVPTXISD::NodeType Opcode;
unsigned Opcode;
SmallVector<SDValue> Operands(LD->ops());
unsigned OldNumOutputs; // non-glue, non-chain outputs
switch (LD->getOpcode()) {
Expand Down Expand Up @@ -5540,7 +5450,7 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
auto *ST = cast<MemSDNode>(N);

// The new opcode after we double the number of operands.
NVPTXISD::NodeType Opcode;
unsigned Opcode;
switch (N->getOpcode()) {
case ISD::STORE:
// Any packed type is legal, so the legalizer will not have lowered
Expand Down Expand Up @@ -5675,7 +5585,7 @@ static SDValue PerformFADDCombine(SDNode *N,
}

/// Get 3-input version of a 2-input min/max opcode
static NVPTXISD::NodeType getMinMax3Opcode(unsigned MinMax2Opcode) {
static unsigned getMinMax3Opcode(unsigned MinMax2Opcode) {
switch (MinMax2Opcode) {
case ISD::FMAXNUM:
case ISD::FMAXIMUMNUM:
Expand Down Expand Up @@ -5706,7 +5616,7 @@ static SDValue PerformFMinMaxCombine(SDNode *N,
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
unsigned MinMaxOp2 = N->getOpcode();
NVPTXISD::NodeType MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);
unsigned MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);

if (Op0.getOpcode() == MinMaxOp2 && Op0.hasOneUse()) {
// (maxnum (maxnum a, b), c) -> (maxnum3 a, b, c)
Expand Down
114 changes: 0 additions & 114 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,118 +20,6 @@
#include "llvm/Support/AtomicOrdering.h"

namespace llvm {
namespace NVPTXISD {
enum NodeType : unsigned {
// Start the numbering from where ISD NodeType finishes.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
RET_GLUE,

/// These nodes represent a parameter declaration. In PTX this will look like:
/// .param .align 16 .b8 param0[1024];
/// .param .b32 retval0;
///
/// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
/// DeclareScalarParam(Chain, Externalsym, Size, Glue)
DeclareScalarParam,
DeclareArrayParam,

/// This node represents a PTX call instruction. It's operands are as follows:
///
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
/// NumParams, Callee, Proto)
CALL,

MoveParam,
CallPrototype,
ProxyReg,
FSHL_CLAMP,
FSHR_CLAMP,
MUL_WIDE_SIGNED,
MUL_WIDE_UNSIGNED,
SETP_F16X2,
SETP_BF16X2,
BFI,
PRMT,

/// This node is similar to ISD::BUILD_VECTOR except that the output may be
/// implicitly bitcast to a scalar. This allows for the representation of
/// packing move instructions for vector types which are not legal i.e. v2i32
BUILD_VECTOR,

/// This node is the inverse of NVPTX::BUILD_VECTOR. It takes a single value
/// which may be a scalar and unpacks it into multiple values by implicitly
/// converting it to a vector.
UNPACK_VECTOR,

FCOPYSIGN,
FMAXNUM3,
FMINNUM3,
FMAXIMUM3,
FMINIMUM3,

DYNAMIC_STACKALLOC,
STACKRESTORE,
STACKSAVE,
BrxStart,
BrxItem,
BrxEnd,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z,
CVT_E4M3X4_F32X4_RS_SF,
CVT_E5M2X4_F32X4_RS_SF,
CVT_E2M3X4_F32X4_RS_SF,
CVT_E3M2X4_F32X4_RS_SF,
CVT_E2M1X4_F32X4_RS_SF,

FIRST_MEMORY_OPCODE,

/// These nodes are used to lower atomic instructions with i128 type. They are
/// similar to the generic nodes, but the input and output values are split
/// into two 64-bit values.
/// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi,
/// swapLo, swapHi)
/// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi)
ATOMIC_CMP_SWAP_B128 = FIRST_MEMORY_OPCODE,
ATOMIC_SWAP_B128,

LoadV2,
LoadV4,
LoadV8,
LDUV2, // LDU.v2
LDUV4, // LDU.v4
StoreV2,
StoreV4,
StoreV8,
TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
LAST_MEMORY_OPCODE =
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
};
}

class NVPTXSubtarget;

Expand All @@ -144,8 +32,6 @@ class NVPTXTargetLowering : public TargetLowering {
const NVPTXSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

const char *getTargetNodeName(unsigned Opcode) const override;

bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
Expand Down
13 changes: 11 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1686,13 +1686,19 @@ def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]

def SDTProxyReg : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>;


// These nodes represent a parameter declaration. In PTX this will look like:
// .param .align 16 .b8 param0[1024];
// .param .b32 retval0;
//
// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
// DeclareScalarParam(Chain, Externalsym, Size, Glue)
def declare_array_param :
SDNode<"NVPTXISD::DeclareArrayParam", SDTDeclareArrayParam,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def declare_scalar_param :
SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParam,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;

def MoveParam :
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
def proxy_reg :
Expand Down Expand Up @@ -1754,7 +1760,7 @@ def : Pat<(declare_scalar_param externalsym:$a, imm:$size),
def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def CallPrototype :
SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
[SDNPHasChain, SDNPSideEffect]>;
def ProtoIdent : Operand<i32> { let PrintMethod = "printProtoIdent"; }
def CALL_PROTOTYPE :
NVPTXInst<(outs), (ins ProtoIdent:$ident),
Expand Down Expand Up @@ -2181,6 +2187,9 @@ foreach vt = [v2f32, v2i32] in {
def: Pat<(v2i16 (scalar_to_vector i16:$a)),
(CVT_u32_u16 $a, CvtNONE)>;

// This node is similar to ISD::BUILD_VECTOR except that the output may be
// implicitly bitcast to a scalar. This allows for the representation of
// packing move instructions for vector types which are not legal i.e. v2i32
def nvptx_build_vector : SDNode<"NVPTXISD::BUILD_VECTOR", SDTypeProfile<1, 2, []>, []>;

def : Pat<(i64 (nvptx_build_vector i32:$a, i32:$b)),
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -5625,7 +5625,7 @@ class Tcgen05MMADisableOutputLaneSDNode<bit Sp, string ASpace,
# "_DISABLE_OUTPUT_LANE_CG" # CtaGroup
# !if(!eq(AShift, 1), "_ASHIFT", ""),
Tcgen05MMADisableOutputLaneTypeProfile<Sp, ASpace, CtaGroup, ScaleInput>,
[SDNPHasChain, SDNPSideEffect]>;
[SDNPHasChain, SDNPSideEffect, SDNPMemOperand]>;

class Tcgen05MMADisableOutputLaneInst<bit Sp, string ASpace,
string Kind, int CtaGroup, string CollectorUsageStr,
Expand Down
Loading
Loading