Skip to content

Commit e47e9f3

Browse files
authored
[NVPTX] TableGen-erate SDNode descriptions (#168367)
This allows SDNodes to be validated against their expected type profiles and reduces the number of changes required to add a new node. The verification functionality detected a few issues, two of them were fixed (missing `SDNPMemOperand` property on `TCGEN05_MMA` nodes and extra glue operand/result on `CallPrototype`), the one remaining is with `ProxyReg` node, see `NVPTXSelectionDAGInfo::verifyTargetNode()`. Part of #119709. Pull Request: #168367
1 parent db71cc5 commit e47e9f3

File tree

8 files changed

+114
-219
lines changed

8 files changed

+114
-219
lines changed

llvm/lib/Target/NVPTX/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
66
tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
77
tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
88
tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
9+
tablegen(LLVM NVPTXGenSDNodeInfo.inc -gen-sd-node-info)
910
tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
1011

1112
add_public_tablegen_target(NVPTXCommonTableGen)

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "NVPTX.h"
1818
#include "NVPTXISelLowering.h"
1919
#include "NVPTXRegisterInfo.h"
20+
#include "NVPTXSelectionDAGInfo.h"
2021
#include "NVPTXTargetMachine.h"
2122
#include "llvm/ADT/MapVector.h"
2223
#include "llvm/CodeGen/SelectionDAGISel.h"

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 8 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "MCTargetDesc/NVPTXBaseInfo.h"
1616
#include "NVPTX.h"
1717
#include "NVPTXISelDAGToDAG.h"
18+
#include "NVPTXSelectionDAGInfo.h"
1819
#include "NVPTXSubtarget.h"
1920
#include "NVPTXTargetMachine.h"
2021
#include "NVPTXTargetObjectFile.h"
@@ -1107,97 +1108,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
11071108
{MVT::i32, MVT::i128, MVT::v4f32, MVT::Other}, Custom);
11081109
}
11091110

1110-
const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
1111-
1112-
#define MAKE_CASE(V) \
1113-
case V: \
1114-
return #V;
1115-
1116-
switch ((NVPTXISD::NodeType)Opcode) {
1117-
case NVPTXISD::FIRST_NUMBER:
1118-
break;
1119-
1120-
MAKE_CASE(NVPTXISD::ATOMIC_CMP_SWAP_B128)
1121-
MAKE_CASE(NVPTXISD::ATOMIC_SWAP_B128)
1122-
MAKE_CASE(NVPTXISD::RET_GLUE)
1123-
MAKE_CASE(NVPTXISD::DeclareArrayParam)
1124-
MAKE_CASE(NVPTXISD::DeclareScalarParam)
1125-
MAKE_CASE(NVPTXISD::CALL)
1126-
MAKE_CASE(NVPTXISD::MoveParam)
1127-
MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
1128-
MAKE_CASE(NVPTXISD::BUILD_VECTOR)
1129-
MAKE_CASE(NVPTXISD::CallPrototype)
1130-
MAKE_CASE(NVPTXISD::ProxyReg)
1131-
MAKE_CASE(NVPTXISD::LoadV2)
1132-
MAKE_CASE(NVPTXISD::LoadV4)
1133-
MAKE_CASE(NVPTXISD::LoadV8)
1134-
MAKE_CASE(NVPTXISD::LDUV2)
1135-
MAKE_CASE(NVPTXISD::LDUV4)
1136-
MAKE_CASE(NVPTXISD::StoreV2)
1137-
MAKE_CASE(NVPTXISD::StoreV4)
1138-
MAKE_CASE(NVPTXISD::StoreV8)
1139-
MAKE_CASE(NVPTXISD::FSHL_CLAMP)
1140-
MAKE_CASE(NVPTXISD::FSHR_CLAMP)
1141-
MAKE_CASE(NVPTXISD::BFI)
1142-
MAKE_CASE(NVPTXISD::PRMT)
1143-
MAKE_CASE(NVPTXISD::FCOPYSIGN)
1144-
MAKE_CASE(NVPTXISD::FMAXNUM3)
1145-
MAKE_CASE(NVPTXISD::FMINNUM3)
1146-
MAKE_CASE(NVPTXISD::FMAXIMUM3)
1147-
MAKE_CASE(NVPTXISD::FMINIMUM3)
1148-
MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
1149-
MAKE_CASE(NVPTXISD::STACKRESTORE)
1150-
MAKE_CASE(NVPTXISD::STACKSAVE)
1151-
MAKE_CASE(NVPTXISD::SETP_F16X2)
1152-
MAKE_CASE(NVPTXISD::SETP_BF16X2)
1153-
MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
1154-
MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
1155-
MAKE_CASE(NVPTXISD::BrxEnd)
1156-
MAKE_CASE(NVPTXISD::BrxItem)
1157-
MAKE_CASE(NVPTXISD::BrxStart)
1158-
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED)
1159-
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X)
1160-
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y)
1161-
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z)
1162-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1)
1163-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2)
1164-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
1165-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
1166-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1)
1167-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2)
1168-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
1169-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
1170-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
1171-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
1172-
MAKE_CASE(
1173-
NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
1174-
MAKE_CASE(
1175-
NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
1176-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1)
1177-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2)
1178-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
1179-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
1180-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1)
1181-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2)
1182-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
1183-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
1184-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
1185-
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
1186-
MAKE_CASE(
1187-
NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
1188-
MAKE_CASE(
1189-
NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
1190-
MAKE_CASE(NVPTXISD::CVT_E4M3X4_F32X4_RS_SF)
1191-
MAKE_CASE(NVPTXISD::CVT_E5M2X4_F32X4_RS_SF)
1192-
MAKE_CASE(NVPTXISD::CVT_E2M3X4_F32X4_RS_SF)
1193-
MAKE_CASE(NVPTXISD::CVT_E3M2X4_F32X4_RS_SF)
1194-
MAKE_CASE(NVPTXISD::CVT_E2M1X4_F32X4_RS_SF)
1195-
}
1196-
return nullptr;
1197-
1198-
#undef MAKE_CASE
1199-
}
1200-
12011111
TargetLoweringBase::LegalizeTypeAction
12021112
NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
12031113
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
@@ -2032,7 +1942,7 @@ static ISD::NodeType getScalarOpcodeForReduction(unsigned ReductionOpcode) {
20321942
}
20331943

20341944
/// Get 3-input scalar reduction opcode
2035-
static std::optional<NVPTXISD::NodeType>
1945+
static std::optional<unsigned>
20361946
getScalar3OpcodeForReduction(unsigned ReductionOpcode) {
20371947
switch (ReductionOpcode) {
20381948
case ISD::VECREDUCE_FMAX:
@@ -2931,7 +2841,7 @@ static SDValue lowerCvtRSIntrinsics(SDValue Op, SelectionDAG &DAG) {
29312841
using NVPTX::PTXCvtMode::CvtMode;
29322842

29332843
auto [OpCode, RetTy, CvtModeFlag] =
2934-
[&]() -> std::tuple<NVPTXISD::NodeType, MVT::SimpleValueType, uint32_t> {
2844+
[&]() -> std::tuple<unsigned, MVT::SimpleValueType, uint32_t> {
29352845
switch (IntrinsicID) {
29362846
case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite:
29372847
return {NVPTXISD::CVT_E4M3X4_F32X4_RS_SF, MVT::v4i8,
@@ -3314,7 +3224,7 @@ SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
33143224
// Generate BrxEnd nodes
33153225
SDValue EndOps[] = {Chain.getValue(0), DAG.getBasicBlock(MBBs.back()), Index,
33163226
IdV, Chain.getValue(1)};
3317-
SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, VTs, EndOps);
3227+
SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, MVT::Other, EndOps);
33183228

33193229
return BrxEnd;
33203230
}
@@ -5457,7 +5367,7 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
54575367
SDLoc DL(LD);
54585368

54595369
// the new opcode after we double the number of operands
5460-
NVPTXISD::NodeType Opcode;
5370+
unsigned Opcode;
54615371
SmallVector<SDValue> Operands(LD->ops());
54625372
unsigned OldNumOutputs; // non-glue, non-chain outputs
54635373
switch (LD->getOpcode()) {
@@ -5540,7 +5450,7 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
55405450
auto *ST = cast<MemSDNode>(N);
55415451

55425452
// The new opcode after we double the number of operands.
5543-
NVPTXISD::NodeType Opcode;
5453+
unsigned Opcode;
55445454
switch (N->getOpcode()) {
55455455
case ISD::STORE:
55465456
// Any packed type is legal, so the legalizer will not have lowered
@@ -5675,7 +5585,7 @@ static SDValue PerformFADDCombine(SDNode *N,
56755585
}
56765586

56775587
/// Get 3-input version of a 2-input min/max opcode
5678-
static NVPTXISD::NodeType getMinMax3Opcode(unsigned MinMax2Opcode) {
5588+
static unsigned getMinMax3Opcode(unsigned MinMax2Opcode) {
56795589
switch (MinMax2Opcode) {
56805590
case ISD::FMAXNUM:
56815591
case ISD::FMAXIMUMNUM:
@@ -5706,7 +5616,7 @@ static SDValue PerformFMinMaxCombine(SDNode *N,
57065616
SDValue Op0 = N->getOperand(0);
57075617
SDValue Op1 = N->getOperand(1);
57085618
unsigned MinMaxOp2 = N->getOpcode();
5709-
NVPTXISD::NodeType MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);
5619+
unsigned MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);
57105620

57115621
if (Op0.getOpcode() == MinMaxOp2 && Op0.hasOneUse()) {
57125622
// (maxnum (maxnum a, b), c) -> (maxnum3 a, b, c)

llvm/lib/Target/NVPTX/NVPTXISelLowering.h

Lines changed: 0 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -20,118 +20,6 @@
2020
#include "llvm/Support/AtomicOrdering.h"
2121

2222
namespace llvm {
23-
namespace NVPTXISD {
24-
enum NodeType : unsigned {
25-
// Start the numbering from where ISD NodeType finishes.
26-
FIRST_NUMBER = ISD::BUILTIN_OP_END,
27-
RET_GLUE,
28-
29-
/// These nodes represent a parameter declaration. In PTX this will look like:
30-
/// .param .align 16 .b8 param0[1024];
31-
/// .param .b32 retval0;
32-
///
33-
/// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
34-
/// DeclareScalarParam(Chain, Externalsym, Size, Glue)
35-
DeclareScalarParam,
36-
DeclareArrayParam,
37-
38-
/// This node represents a PTX call instruction. It's operands are as follows:
39-
///
40-
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
41-
/// NumParams, Callee, Proto)
42-
CALL,
43-
44-
MoveParam,
45-
CallPrototype,
46-
ProxyReg,
47-
FSHL_CLAMP,
48-
FSHR_CLAMP,
49-
MUL_WIDE_SIGNED,
50-
MUL_WIDE_UNSIGNED,
51-
SETP_F16X2,
52-
SETP_BF16X2,
53-
BFI,
54-
PRMT,
55-
56-
/// This node is similar to ISD::BUILD_VECTOR except that the output may be
57-
/// implicitly bitcast to a scalar. This allows for the representation of
58-
/// packing move instructions for vector types which are not legal i.e. v2i32
59-
BUILD_VECTOR,
60-
61-
/// This node is the inverse of NVPTX::BUILD_VECTOR. It takes a single value
62-
/// which may be a scalar and unpacks it into multiple values by implicitly
63-
/// converting it to a vector.
64-
UNPACK_VECTOR,
65-
66-
FCOPYSIGN,
67-
FMAXNUM3,
68-
FMINNUM3,
69-
FMAXIMUM3,
70-
FMINIMUM3,
71-
72-
DYNAMIC_STACKALLOC,
73-
STACKRESTORE,
74-
STACKSAVE,
75-
BrxStart,
76-
BrxItem,
77-
BrxEnd,
78-
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED,
79-
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X,
80-
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y,
81-
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z,
82-
CVT_E4M3X4_F32X4_RS_SF,
83-
CVT_E5M2X4_F32X4_RS_SF,
84-
CVT_E2M3X4_F32X4_RS_SF,
85-
CVT_E3M2X4_F32X4_RS_SF,
86-
CVT_E2M1X4_F32X4_RS_SF,
87-
88-
FIRST_MEMORY_OPCODE,
89-
90-
/// These nodes are used to lower atomic instructions with i128 type. They are
91-
/// similar to the generic nodes, but the input and output values are split
92-
/// into two 64-bit values.
93-
/// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi,
94-
/// swapLo, swapHi)
95-
/// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi)
96-
ATOMIC_CMP_SWAP_B128 = FIRST_MEMORY_OPCODE,
97-
ATOMIC_SWAP_B128,
98-
99-
LoadV2,
100-
LoadV4,
101-
LoadV8,
102-
LDUV2, // LDU.v2
103-
LDUV4, // LDU.v4
104-
StoreV2,
105-
StoreV4,
106-
StoreV8,
107-
TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1,
108-
TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2,
109-
TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
110-
TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
111-
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1,
112-
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2,
113-
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
114-
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
115-
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
116-
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
117-
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
118-
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
119-
TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1,
120-
TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2,
121-
TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
122-
TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
123-
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1,
124-
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2,
125-
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
126-
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
127-
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
128-
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
129-
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
130-
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
131-
LAST_MEMORY_OPCODE =
132-
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
133-
};
134-
}
13523

13624
class NVPTXSubtarget;
13725

@@ -144,8 +32,6 @@ class NVPTXTargetLowering : public TargetLowering {
14432
const NVPTXSubtarget &STI);
14533
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
14634

147-
const char *getTargetNodeName(unsigned Opcode) const override;
148-
14935
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
15036
MachineFunction &MF,
15137
unsigned Intrinsic) const override;

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1686,13 +1686,19 @@ def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]
16861686

16871687
def SDTProxyReg : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>;
16881688

1689-
1689+
// These nodes represent a parameter declaration. In PTX this will look like:
1690+
// .param .align 16 .b8 param0[1024];
1691+
// .param .b32 retval0;
1692+
//
1693+
// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
1694+
// DeclareScalarParam(Chain, Externalsym, Size, Glue)
16901695
def declare_array_param :
16911696
SDNode<"NVPTXISD::DeclareArrayParam", SDTDeclareArrayParam,
16921697
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
16931698
def declare_scalar_param :
16941699
SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParam,
16951700
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1701+
16961702
def MoveParam :
16971703
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
16981704
def proxy_reg :
@@ -1754,7 +1760,7 @@ def : Pat<(declare_scalar_param externalsym:$a, imm:$size),
17541760
def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
17551761
def CallPrototype :
17561762
SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
1757-
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1763+
[SDNPHasChain, SDNPSideEffect]>;
17581764
def ProtoIdent : Operand<i32> { let PrintMethod = "printProtoIdent"; }
17591765
def CALL_PROTOTYPE :
17601766
NVPTXInst<(outs), (ins ProtoIdent:$ident),
@@ -2181,6 +2187,9 @@ foreach vt = [v2f32, v2i32] in {
21812187
def: Pat<(v2i16 (scalar_to_vector i16:$a)),
21822188
(CVT_u32_u16 $a, CvtNONE)>;
21832189

2190+
// This node is similar to ISD::BUILD_VECTOR except that the output may be
2191+
// implicitly bitcast to a scalar. This allows for the representation of
2192+
// packing move instructions for vector types which are not legal i.e. v2i32
21842193
def nvptx_build_vector : SDNode<"NVPTXISD::BUILD_VECTOR", SDTypeProfile<1, 2, []>, []>;
21852194

21862195
def : Pat<(i64 (nvptx_build_vector i32:$a, i32:$b)),

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5625,7 +5625,7 @@ class Tcgen05MMADisableOutputLaneSDNode<bit Sp, string ASpace,
56255625
# "_DISABLE_OUTPUT_LANE_CG" # CtaGroup
56265626
# !if(!eq(AShift, 1), "_ASHIFT", ""),
56275627
Tcgen05MMADisableOutputLaneTypeProfile<Sp, ASpace, CtaGroup, ScaleInput>,
5628-
[SDNPHasChain, SDNPSideEffect]>;
5628+
[SDNPHasChain, SDNPSideEffect, SDNPMemOperand]>;
56295629

56305630
class Tcgen05MMADisableOutputLaneInst<bit Sp, string ASpace,
56315631
string Kind, int CtaGroup, string CollectorUsageStr,

0 commit comments

Comments
 (0)