-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] TableGen-erate SDNode descriptions #168248
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Sergei Barannikov (s-barannikov) ChangesThis allows SDNodes to be validated against their expected type profiles Autogenerated node names start with "AMDGPUISD::", hence the changes in the tests. The few nodes defined in R600.td are not imported because TableGen processes AMDGPU.td that doesn't include R600.td. Ideally, we would have two sets of nodes, but that would require careful reorganization of td files since some nodes are shared between AMDGPU/R600. Not sure if it something worth looking into. Part of #119709. Patch is 30.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168248.diff 13 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index c902b7e7f1d87..a86b75458923e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
+#include "AMDGPUSelectionDAGInfo.h"
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "SIModeRegisterDefaults.h"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3fbdab7ec4ed2..db890df7c50f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -17,6 +17,7 @@
#include "AMDGPUInstrInfo.h"
#include "AMDGPUMachineFunction.h"
#include "AMDGPUMemoryUtils.h"
+#include "AMDGPUSelectionDAGInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
@@ -5650,169 +5651,6 @@ uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
return getImplicitParameterOffset(MFI->getExplicitKernArgSize(), Param);
}
-#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
-
-const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch ((AMDGPUISD::NodeType)Opcode) {
- case AMDGPUISD::FIRST_NUMBER: break;
- // AMDIL DAG nodes
- NODE_NAME_CASE(BRANCH_COND);
-
- // AMDGPU DAG nodes
- NODE_NAME_CASE(IF)
- NODE_NAME_CASE(ELSE)
- NODE_NAME_CASE(LOOP)
- NODE_NAME_CASE(CALL)
- NODE_NAME_CASE(TC_RETURN)
- NODE_NAME_CASE(TC_RETURN_GFX)
- NODE_NAME_CASE(TC_RETURN_GFX_WholeWave)
- NODE_NAME_CASE(TC_RETURN_CHAIN)
- NODE_NAME_CASE(TC_RETURN_CHAIN_DVGPR)
- NODE_NAME_CASE(TRAP)
- NODE_NAME_CASE(RET_GLUE)
- NODE_NAME_CASE(WAVE_ADDRESS)
- NODE_NAME_CASE(RETURN_TO_EPILOG)
- NODE_NAME_CASE(ENDPGM)
- NODE_NAME_CASE(ENDPGM_TRAP)
- NODE_NAME_CASE(SIMULATED_TRAP)
- NODE_NAME_CASE(DWORDADDR)
- NODE_NAME_CASE(FRACT)
- NODE_NAME_CASE(SETCC)
- NODE_NAME_CASE(DENORM_MODE)
- NODE_NAME_CASE(FMA_W_CHAIN)
- NODE_NAME_CASE(FMUL_W_CHAIN)
- NODE_NAME_CASE(CLAMP)
- NODE_NAME_CASE(COS_HW)
- NODE_NAME_CASE(SIN_HW)
- NODE_NAME_CASE(FMAX_LEGACY)
- NODE_NAME_CASE(FMIN_LEGACY)
- NODE_NAME_CASE(FMAX3)
- NODE_NAME_CASE(SMAX3)
- NODE_NAME_CASE(UMAX3)
- NODE_NAME_CASE(FMIN3)
- NODE_NAME_CASE(SMIN3)
- NODE_NAME_CASE(UMIN3)
- NODE_NAME_CASE(FMED3)
- NODE_NAME_CASE(SMED3)
- NODE_NAME_CASE(UMED3)
- NODE_NAME_CASE(FMAXIMUM3)
- NODE_NAME_CASE(FMINIMUM3)
- NODE_NAME_CASE(FDOT2)
- NODE_NAME_CASE(URECIP)
- NODE_NAME_CASE(DIV_SCALE)
- NODE_NAME_CASE(DIV_FMAS)
- NODE_NAME_CASE(DIV_FIXUP)
- NODE_NAME_CASE(FMAD_FTZ)
- NODE_NAME_CASE(RCP)
- NODE_NAME_CASE(RSQ)
- NODE_NAME_CASE(RCP_LEGACY)
- NODE_NAME_CASE(RCP_IFLAG)
- NODE_NAME_CASE(LOG)
- NODE_NAME_CASE(EXP)
- NODE_NAME_CASE(FMUL_LEGACY)
- NODE_NAME_CASE(RSQ_CLAMP)
- NODE_NAME_CASE(FP_CLASS)
- NODE_NAME_CASE(DOT4)
- NODE_NAME_CASE(CARRY)
- NODE_NAME_CASE(BORROW)
- NODE_NAME_CASE(BFE_U32)
- NODE_NAME_CASE(BFE_I32)
- NODE_NAME_CASE(BFI)
- NODE_NAME_CASE(BFM)
- NODE_NAME_CASE(FFBH_U32)
- NODE_NAME_CASE(FFBH_I32)
- NODE_NAME_CASE(FFBL_B32)
- NODE_NAME_CASE(MUL_U24)
- NODE_NAME_CASE(MUL_I24)
- NODE_NAME_CASE(MULHI_U24)
- NODE_NAME_CASE(MULHI_I24)
- NODE_NAME_CASE(MAD_U24)
- NODE_NAME_CASE(MAD_I24)
- NODE_NAME_CASE(MAD_I64_I32)
- NODE_NAME_CASE(MAD_U64_U32)
- NODE_NAME_CASE(PERM)
- NODE_NAME_CASE(TEXTURE_FETCH)
- NODE_NAME_CASE(R600_EXPORT)
- NODE_NAME_CASE(CONST_ADDRESS)
- NODE_NAME_CASE(REGISTER_LOAD)
- NODE_NAME_CASE(REGISTER_STORE)
- NODE_NAME_CASE(CVT_F32_UBYTE0)
- NODE_NAME_CASE(CVT_F32_UBYTE1)
- NODE_NAME_CASE(CVT_F32_UBYTE2)
- NODE_NAME_CASE(CVT_F32_UBYTE3)
- NODE_NAME_CASE(CVT_PKRTZ_F16_F32)
- NODE_NAME_CASE(CVT_PKNORM_I16_F32)
- NODE_NAME_CASE(CVT_PKNORM_U16_F32)
- NODE_NAME_CASE(CVT_PK_I16_I32)
- NODE_NAME_CASE(CVT_PK_U16_U32)
- NODE_NAME_CASE(FP_TO_FP16)
- NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
- NODE_NAME_CASE(CONST_DATA_PTR)
- NODE_NAME_CASE(PC_ADD_REL_OFFSET)
- NODE_NAME_CASE(PC_ADD_REL_OFFSET64)
- NODE_NAME_CASE(LDS)
- NODE_NAME_CASE(DUMMY_CHAIN)
- NODE_NAME_CASE(LOAD_D16_HI)
- NODE_NAME_CASE(LOAD_D16_LO)
- NODE_NAME_CASE(LOAD_D16_HI_I8)
- NODE_NAME_CASE(LOAD_D16_HI_U8)
- NODE_NAME_CASE(LOAD_D16_LO_I8)
- NODE_NAME_CASE(LOAD_D16_LO_U8)
- NODE_NAME_CASE(STORE_MSKOR)
- NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
- NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
- NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
- NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
- NODE_NAME_CASE(DS_ORDERED_COUNT)
- NODE_NAME_CASE(ATOMIC_CMP_SWAP)
- NODE_NAME_CASE(BUFFER_LOAD)
- NODE_NAME_CASE(BUFFER_LOAD_UBYTE)
- NODE_NAME_CASE(BUFFER_LOAD_USHORT)
- NODE_NAME_CASE(BUFFER_LOAD_BYTE)
- NODE_NAME_CASE(BUFFER_LOAD_SHORT)
- NODE_NAME_CASE(BUFFER_LOAD_TFE)
- NODE_NAME_CASE(BUFFER_LOAD_UBYTE_TFE)
- NODE_NAME_CASE(BUFFER_LOAD_USHORT_TFE)
- NODE_NAME_CASE(BUFFER_LOAD_BYTE_TFE)
- NODE_NAME_CASE(BUFFER_LOAD_SHORT_TFE)
- NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
- NODE_NAME_CASE(BUFFER_LOAD_FORMAT_TFE)
- NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)
- NODE_NAME_CASE(SBUFFER_LOAD)
- NODE_NAME_CASE(SBUFFER_LOAD_BYTE)
- NODE_NAME_CASE(SBUFFER_LOAD_UBYTE)
- NODE_NAME_CASE(SBUFFER_LOAD_SHORT)
- NODE_NAME_CASE(SBUFFER_LOAD_USHORT)
- NODE_NAME_CASE(SBUFFER_PREFETCH_DATA)
- NODE_NAME_CASE(BUFFER_STORE)
- NODE_NAME_CASE(BUFFER_STORE_BYTE)
- NODE_NAME_CASE(BUFFER_STORE_SHORT)
- NODE_NAME_CASE(BUFFER_STORE_FORMAT)
- NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16)
- NODE_NAME_CASE(BUFFER_ATOMIC_SWAP)
- NODE_NAME_CASE(BUFFER_ATOMIC_ADD)
- NODE_NAME_CASE(BUFFER_ATOMIC_SUB)
- NODE_NAME_CASE(BUFFER_ATOMIC_SMIN)
- NODE_NAME_CASE(BUFFER_ATOMIC_UMIN)
- NODE_NAME_CASE(BUFFER_ATOMIC_SMAX)
- NODE_NAME_CASE(BUFFER_ATOMIC_UMAX)
- NODE_NAME_CASE(BUFFER_ATOMIC_AND)
- NODE_NAME_CASE(BUFFER_ATOMIC_OR)
- NODE_NAME_CASE(BUFFER_ATOMIC_XOR)
- NODE_NAME_CASE(BUFFER_ATOMIC_INC)
- NODE_NAME_CASE(BUFFER_ATOMIC_DEC)
- NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
- NODE_NAME_CASE(BUFFER_ATOMIC_CSUB)
- NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
- NODE_NAME_CASE(BUFFER_ATOMIC_FMIN)
- NODE_NAME_CASE(BUFFER_ATOMIC_FMAX)
- NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32)
- NODE_NAME_CASE(WHOLE_WAVE_SETUP)
- NODE_NAME_CASE(WHOLE_WAVE_RETURN)
- }
- return nullptr;
-}
-
SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &RefinementSteps,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index bdaf48652d107..473975133f5b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -280,8 +280,6 @@ class AMDGPUTargetLowering : public TargetLowering {
SDValue RHS, SDValue True, SDValue False,
SDValue CC, DAGCombinerInfo &DCI) const;
- const char* getTargetNodeName(unsigned Opcode) const override;
-
// FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for
// AMDGPU. Commit r319036,
// (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6)
@@ -406,235 +404,6 @@ class AMDGPUTargetLowering : public TargetLowering {
}
};
-namespace AMDGPUISD {
-
-enum NodeType : unsigned {
- // AMDIL ISD Opcodes
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- BRANCH_COND,
- // End AMDIL ISD Opcodes
-
- // Function call.
- CALL,
- TC_RETURN,
- TC_RETURN_GFX,
- TC_RETURN_GFX_WholeWave,
- TC_RETURN_CHAIN,
- TC_RETURN_CHAIN_DVGPR,
- TRAP,
-
- // Masked control flow nodes.
- IF,
- ELSE,
- LOOP,
-
- // A uniform kernel return that terminates the wavefront.
- ENDPGM,
-
- // s_endpgm, but we may want to insert it in the middle of the block.
- ENDPGM_TRAP,
-
- // "s_trap 2" equivalent on hardware that does not support it.
- SIMULATED_TRAP,
-
- // Return to a shader part's epilog code.
- RETURN_TO_EPILOG,
-
- // Return with values from a non-entry function.
- RET_GLUE,
-
- // Convert a unswizzled wave uniform stack address to an address compatible
- // with a vector offset for use in stack access.
- WAVE_ADDRESS,
-
- DWORDADDR,
- FRACT,
-
- /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output
- /// modifier behavior with dx10_enable.
- CLAMP,
-
- // This is SETCC with the full mask result which is used for a compare with a
- // result bit per item in the wavefront.
- SETCC,
-
- DENORM_MODE,
-
- // FP ops with input and output chain.
- FMA_W_CHAIN,
- FMUL_W_CHAIN,
-
- // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
- // Denormals handled on some parts.
- COS_HW,
- SIN_HW,
- FMAX_LEGACY,
- FMIN_LEGACY,
-
- FMAX3,
- SMAX3,
- UMAX3,
- FMIN3,
- SMIN3,
- UMIN3,
- FMED3,
- SMED3,
- UMED3,
- FMAXIMUM3,
- FMINIMUM3,
- FDOT2,
- URECIP,
- DIV_SCALE,
- DIV_FMAS,
- DIV_FIXUP,
- // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
- // treated as an illegal operation.
- FMAD_FTZ,
-
- // RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
- // For f64, max error 2^29 ULP, handles denormals.
- RCP,
- RSQ,
- RCP_LEGACY,
- RCP_IFLAG,
-
- // log2, no denormal handling for f32.
- LOG,
-
- // exp2, no denormal handling for f32.
- EXP,
-
- FMUL_LEGACY,
- RSQ_CLAMP,
- FP_CLASS,
- DOT4,
- CARRY,
- BORROW,
- BFE_U32, // Extract range of bits with zero extension to 32-bits.
- BFE_I32, // Extract range of bits with sign extension to 32-bits.
- BFI, // (src0 & src1) | (~src0 & src2)
- BFM, // Insert a range of bits into a 32-bit word.
- FFBH_U32, // ctlz with -1 if input is zero.
- FFBH_I32,
- FFBL_B32, // cttz with -1 if input is zero.
- MUL_U24,
- MUL_I24,
- MULHI_U24,
- MULHI_I24,
- MAD_U24,
- MAD_I24,
- MAD_U64_U32,
- MAD_I64_I32,
- PERM,
- TEXTURE_FETCH,
- R600_EXPORT,
- CONST_ADDRESS,
- REGISTER_LOAD,
- REGISTER_STORE,
-
- // These cvt_f32_ubyte* nodes need to remain consecutive and in order.
- CVT_F32_UBYTE0,
- CVT_F32_UBYTE1,
- CVT_F32_UBYTE2,
- CVT_F32_UBYTE3,
-
- // Convert two float 32 numbers into a single register holding two packed f16
- // with round to zero.
- CVT_PKRTZ_F16_F32,
- CVT_PKNORM_I16_F32,
- CVT_PKNORM_U16_F32,
- CVT_PK_I16_I32,
- CVT_PK_U16_U32,
-
- // Same as the standard node, except the high bits of the resulting integer
- // are known 0.
- FP_TO_FP16,
-
- /// This node is for VLIW targets and it is used to represent a vector
- /// that is stored in consecutive registers with the same channel.
- /// For example:
- /// |X |Y|Z|W|
- /// T0|v.x| | | |
- /// T1|v.y| | | |
- /// T2|v.z| | | |
- /// T3|v.w| | | |
- BUILD_VERTICAL_VECTOR,
- /// Pointer to the start of the shader's constant data.
- CONST_DATA_PTR,
- PC_ADD_REL_OFFSET,
- PC_ADD_REL_OFFSET64,
- LDS,
-
- DUMMY_CHAIN,
-
- FIRST_MEMORY_OPCODE,
- LOAD_D16_HI = FIRST_MEMORY_OPCODE,
- LOAD_D16_LO,
- LOAD_D16_HI_I8,
- LOAD_D16_HI_U8,
- LOAD_D16_LO_I8,
- LOAD_D16_LO_U8,
-
- STORE_MSKOR,
- TBUFFER_STORE_FORMAT,
- TBUFFER_STORE_FORMAT_D16,
- TBUFFER_LOAD_FORMAT,
- TBUFFER_LOAD_FORMAT_D16,
- DS_ORDERED_COUNT,
- ATOMIC_CMP_SWAP,
- BUFFER_LOAD,
- BUFFER_LOAD_UBYTE,
- BUFFER_LOAD_USHORT,
- BUFFER_LOAD_BYTE,
- BUFFER_LOAD_SHORT,
- BUFFER_LOAD_TFE,
- BUFFER_LOAD_UBYTE_TFE,
- BUFFER_LOAD_USHORT_TFE,
- BUFFER_LOAD_BYTE_TFE,
- BUFFER_LOAD_SHORT_TFE,
- BUFFER_LOAD_FORMAT,
- BUFFER_LOAD_FORMAT_TFE,
- BUFFER_LOAD_FORMAT_D16,
- SBUFFER_LOAD,
- SBUFFER_LOAD_BYTE,
- SBUFFER_LOAD_UBYTE,
- SBUFFER_LOAD_SHORT,
- SBUFFER_LOAD_USHORT,
- SBUFFER_PREFETCH_DATA,
- BUFFER_STORE,
- BUFFER_STORE_BYTE,
- BUFFER_STORE_SHORT,
- BUFFER_STORE_FORMAT,
- BUFFER_STORE_FORMAT_D16,
- BUFFER_ATOMIC_SWAP,
- BUFFER_ATOMIC_ADD,
- BUFFER_ATOMIC_SUB,
- BUFFER_ATOMIC_SMIN,
- BUFFER_ATOMIC_UMIN,
- BUFFER_ATOMIC_SMAX,
- BUFFER_ATOMIC_UMAX,
- BUFFER_ATOMIC_AND,
- BUFFER_ATOMIC_OR,
- BUFFER_ATOMIC_XOR,
- BUFFER_ATOMIC_INC,
- BUFFER_ATOMIC_DEC,
- BUFFER_ATOMIC_CMPSWAP,
- BUFFER_ATOMIC_CSUB,
- BUFFER_ATOMIC_FADD,
- BUFFER_ATOMIC_FMIN,
- BUFFER_ATOMIC_FMAX,
- BUFFER_ATOMIC_COND_SUB_U32,
- LAST_MEMORY_OPCODE = BUFFER_ATOMIC_COND_SUB_U32,
-
- // Set up a whole wave function.
- WHOLE_WAVE_SETUP,
-
- // Return from a whole wave function.
- WHOLE_WAVE_RETURN,
-};
-
-} // End namespace AMDGPUISD
-
} // End namespace llvm
#endif
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index b8fa6f3fc6867..8a43c2da38346 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -62,6 +62,7 @@ def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
// AMDGPU DAG Nodes
//
+// Masked control flow nodes.
def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>;
def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>;
def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>;
@@ -114,6 +115,7 @@ def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP",
[SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPOptInGlue]
>;
+// Pointer to the start of the shader's constant data.
def AMDGPUconstdata_ptr : SDNode<
"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>,
SDTCisVT<0, iPTR>]>
@@ -122,18 +124,21 @@ def AMDGPUconstdata_ptr : SDNode<
// This argument to this node is a dword address.
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
+// Denormals handled on some parts.
def AMDGPUcos_impl : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
def AMDGPUsin_impl : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
+
// out = a - floor(a)
def AMDGPUfract_impl : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
// out = 1.0 / a
def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
-// v_log_f32, which is log2
+// v_log_f32, which is log2, no denormal handling for f32.
def AMDGPUlog_impl : SDNode<"AMDGPUISD::LOG", SDTFPUnaryOp>;
-// v_exp_f32, which is exp2
+// v_exp_f32, which is exp2, no denormal handling for f32.
def AMDGPUexp_impl : SDNode<"AMDGPUISD::EXP", SDTFPUnaryOp>;
// out = 1.0 / sqrt(a)
@@ -146,11 +151,16 @@ def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>;
// out = 1.0 / sqrt(a) result clamped to +/- max_float.
def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;
+// Convert two float 32 numbers into a single register holding two packed f16
+// with round to zero.
def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>;
def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>;
def AMDGPUpk_i16_i32_impl : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>;
def AMDGPUpk_u16_u32_impl : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>;
+
+// Same as the standard node, except the high bits of the resulting integer
+// are known 0.
def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;
@@ -225,14 +235,18 @@ def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
]>;
+// This is SETCC with the full mask result which is used for a compare with a
+// result bit per item in the wavefront.
def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
+// FP ops with input and output chain.
def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def AMDGPUmul : SDNode<"AMDGPUISD::FMUL_W_CHAIN", SDTFPBinOp, [
SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+// These cvt_f32_ubyte* nodes need to remain consecutive and in order.
def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
SDTIntToFPOp, []>;
def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
@@ -264,6 +278,8 @@ def AMDGPUdiv_fmas_impl : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
// Denominator, src2 = Numerator).
def AMDGPUdiv_fixup_impl : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
+// For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
+// treated as an illegal operation.
def AMDGPUfmad_ftz_impl : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
@@ -290,14 +306,23 @@ def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP",
[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
SDNPMemOperand]>;
+// Extract range of bits with zero extension to 32-bits.
def AMDGPUbfe_u32_impl : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
+
+// Extract range of bits with sign extension to 32-bits.
def AMDGPUbfe_i32_impl : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
+
+// (src0 & src1) | (~src0 & src2)
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
+
+// Insert a range of bits into a 32-bit word.
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
+// ctlz with -1 if input is zero.
def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>;
def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>;
+// cttz with -1 if input is zero.
def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>;
// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
@@ -394,16 +419,24 @@ def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
//===----------------------------------------------------------------------===//
// Call/Return DAG Nodes
//===----------------------------------------------------------------------===//
+
+// A uniform kernel return that terminates the wavefront.
def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
+
+// s_endpgm, but we may want to insert it in the middle of the block.
def AMDGPUendpgm_trap : SDNode<"AMDGPUISD::ENDPGM_TRAP", SDTNone,
[SDNPHasChain]>;
+
+// "s_trap 2" equivalent on hardware that does not support it.
def AMDGPUsimulated_trap : SDNode<"AMDGPUISD::SIMULATED_TRAP", SDTNone,
[SDNPHasChain]>;
+// Return to a shader part's epilog code.
def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+// Return with values from a non-entry function.
def AMDGPUret_glue : SDNode<"AMDGPUISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.cpp
index 2941a48c78d94..46e8217987574 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.cpp
@@ -7,13 +7,38 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSelectionDAGInfo.h"
-#include "AMDGPUISelLowering.h"
+
+#define GET_SDNODE_DESC
+#include "AMDGPUGenSDNodeInfo.inc"
using namespace llvm;
+AMDGPUSelectionDAGInfo::AMDGPUSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(AMDGPUGenSDNodeInfo) {}
+
AMDGPUSelectionDAGInfo::~AMDGPUSelectionDAGInfo() = default;
-bool AMDGPUSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- return Opcode >= AMDGPUISD::FIRST_MEMORY_OPCODE &&
- Opcode <= AMDGPUISD::LAST_MEMORY_OPCODE;
+const char *AMDGPUSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+#define NODE_NAME_CASE(node) \
+ case AMDGPUISD::node: \
+ return "AMDGPUISD::" #node;
+
+ switch (static_cast<AMDGPUISD::NodeType>(Opcode)) {
+ // These nodes don't have corresponding entries in *.td files yet.
+ NODE_NAME_CASE(WAVE_ADDRESS)
+ NODE_NAME_CASE(MAD_I64_I32)
+ NODE_NAME_CASE(MAD_U64_U32)
+ NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
+ // These do, but only when compiling R600.td,
+ // and the enum is generated from AMDGPU.td.
+ NODE_NAME_CASE(DOT4)
+ NODE_NAME_CASE(TEXTURE_FETCH)
+ ...
[truncated]
|
This allows SDNodes to be validated against their expected type profiles
and reduces the number of changes required to add a new node.
Autogenerated node names start with "AMDGPUISD::", hence the changes in the tests.
The few nodes defined in R600.td are not imported because TableGen processes AMDGPU.td that doesn't include R600.td. Ideally, we would have two sets of nodes, but that would require careful reorganization of td files since some nodes are shared between AMDGPU/R600. Not sure if it something worth looking into.
Part of #119709.