diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 6068035b2ee47..18aeda6a7935a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1027,6 +1027,64 @@ pickOpcodeForVT(MVT::SimpleValueType VT, std::optional Opcode_i16, } } +static inline bool isAddLike(const SDValue V) { + return V.getOpcode() == ISD::ADD || + (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint()); +} + +// selectBaseADDR - Match a dag node which will serve as the base address for an +// ADDR operand pair. +static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) { + if (const auto *GA = dyn_cast(N)) + return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), + GA->getValueType(0), GA->getOffset(), + GA->getTargetFlags()); + if (const auto *ES = dyn_cast(N)) + return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), + ES->getTargetFlags()); + if (const auto *FIN = dyn_cast(N)) + return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0)); + + return N; +} + +static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) { + APInt AccumulatedOffset(64u, 0); + while (isAddLike(Addr)) { + const auto *CN = dyn_cast(Addr.getOperand(1)); + if (!CN) + break; + + const APInt CI = CN->getAPIntValue().sext(64); + if (!(CI + AccumulatedOffset).isSignedIntN(32)) + break; + + AccumulatedOffset += CI; + Addr = Addr->getOperand(0); + } + return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL, + MVT::i32); +} + +static std::pair selectADDR(SDValue Addr, SelectionDAG *DAG) { + SDValue Offset = accumulateOffset(Addr, SDLoc(Addr), DAG); + SDValue Base = selectBaseADDR(Addr, DAG); + return {Base, Offset}; +} + +// Select a pair of operands which represent a valid PTX address, this could be +// one of the following things: +// - [var] - Offset is simply set to 0 +// - [reg] - Offset is simply set to 0 +// - [reg+immOff] +// - [var+immOff] +// Note that immOff must fit into a 32-bit signed integer. +bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base, + SDValue &Offset) { + std::tie(Base, Offset) = selectADDR(Addr, CurDAG); + return true; +} + bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { MemSDNode *LD = cast(N); assert(LD->readMem() && "Expected load"); @@ -1062,8 +1120,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { FromTypeWidth <= 128 && "Invalid width for load"); // Create the machine instruction DAG - SDValue Offset, Base; - SelectADDR(N->getOperand(1), Base, Offset); + const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG); SDValue Ops[] = {getI32Imm(Ordering, DL), getI32Imm(Scope, DL), getI32Imm(CodeAddrSpace, DL), @@ -1144,8 +1201,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 && FromTypeWidth <= 128 && TotalWidth <= 256 && "Invalid width for load"); - SDValue Offset, Base; - SelectADDR(N->getOperand(1), Base, Offset); + const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG); SDValue Ops[] = {getI32Imm(Ordering, DL), getI32Imm(Scope, DL), getI32Imm(CodeAddrSpace, DL), @@ -1213,8 +1269,7 @@ bool NVPTXDAGToDAGISel::tryLDG(MemSDNode *LD) { assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 && FromTypeWidth <= 128 && TotalWidth <= 256 && "Invalid width for load"); - SDValue Base, Offset; - SelectADDR(LD->getOperand(1), Base, Offset); + const auto [Base, Offset] = selectADDR(LD->getOperand(1), CurDAG); SDValue Ops[] = {getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Base, Offset, LD->getChain()}; @@ -1278,8 +1333,7 @@ bool NVPTXDAGToDAGISel::tryLDU(SDNode *N) { SDValue Addr = LD->getOperand(LD->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 2 : 1); - SDValue Base, Offset; - SelectADDR(Addr, Base, Offset); + const auto [Base, Offset] = selectADDR(Addr, CurDAG); SDValue Ops[] = {getI32Imm(FromTypeWidth, DL), Base, Offset, LD->getChain()}; std::optional Opcode; @@ -1339,9 +1393,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 && "Invalid width for store"); - SDValue Offset, Base; - SelectADDR(ST->getBasePtr(), Base, Offset); - + const auto [Base, Offset] = selectADDR(ST->getBasePtr(), CurDAG); SDValue Ops[] = {selectPossiblyImm(Value), getI32Imm(Ordering, DL), getI32Imm(Scope, DL), @@ -1399,9 +1451,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 && TotalWidth <= 256 && "Invalid width for store"); - SDValue Offset, Base; - SelectADDR(Addr, Base, Offset); - + const auto [Base, Offset] = selectADDR(Addr, CurDAG); Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL), getI32Imm(CodeAddrSpace, DL), getI32Imm(ToTypeWidth, DL), Base, Offset, Chain}); @@ -1708,59 +1758,6 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) { return true; } -static inline bool isAddLike(const SDValue V) { - return V.getOpcode() == ISD::ADD || - (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint()); -} - -// selectBaseADDR - Match a dag node which will serve as the base address for an -// ADDR operand pair. -static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) { - if (const auto *GA = dyn_cast(N)) - return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), - GA->getValueType(0), GA->getOffset(), - GA->getTargetFlags()); - if (const auto *ES = dyn_cast(N)) - return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), - ES->getTargetFlags()); - if (const auto *FIN = dyn_cast(N)) - return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0)); - - return N; -} - -static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) { - APInt AccumulatedOffset(64u, 0); - while (isAddLike(Addr)) { - const auto *CN = dyn_cast(Addr.getOperand(1)); - if (!CN) - break; - - const APInt CI = CN->getAPIntValue().sext(64); - if (!(CI + AccumulatedOffset).isSignedIntN(32)) - break; - - AccumulatedOffset += CI; - Addr = Addr->getOperand(0); - } - return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL, - MVT::i32); -} - -// Select a pair of operands which represent a valid PTX address, this could be -// one of the following things: -// - [var] - Offset is simply set to 0 -// - [reg] - Offset is simply set to 0 -// - [reg+immOff] -// - [var+immOff] -// Note that immOff must fit into a 32-bit signed integer. -bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base, - SDValue &Offset) { - Offset = accumulateOffset(Addr, SDLoc(Addr), CurDAG); - Base = selectBaseADDR(Addr, CurDAG); - return true; -} - SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) { if (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); @@ -1774,37 +1771,20 @@ SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) { return V; } -bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, - unsigned int spN) const { - const Value *Src = nullptr; - if (MemSDNode *mN = dyn_cast(N)) { - if (spN == 0 && mN->getMemOperand()->getPseudoValue()) - return true; - Src = mN->getMemOperand()->getValue(); - } - if (!Src) - return false; - if (auto *PT = dyn_cast(Src->getType())) - return (PT->getAddressSpace() == spN); - return false; -} - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector &OutOps) { - SDValue Op0, Op1; switch (ConstraintID) { default: return true; - case InlineAsm::ConstraintCode::m: // memory - if (SelectADDR(Op, Op0, Op1)) { - OutOps.push_back(Op0); - OutOps.push_back(Op1); - return false; - } - break; + case InlineAsm::ConstraintCode::m: { // memory + const auto [Base, Offset] = selectADDR(Op, CurDAG); + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } } return true; } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 9e0f88e544980..357e915fd077e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -102,8 +102,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { SDValue getPTXCmpMode(const CondCodeSDNode &CondCode); SDValue selectPossiblyImm(SDValue V); - bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; - // Returns the Memory Order and Scope that the PTX memory instruction should // use, and inserts appropriate fence instruction before the memory // instruction, if needed to implement the instructions memory order. Required diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 1ab41bf9a3312..bd54d1db9156f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -148,13 +148,16 @@ class OneUse2 : PatFrag<(ops node:$A, node:$B), (operator node:$A, node:$B), [{ return N->hasOneUse(); }]>; -class fpimm_pos_inf - : FPImmLeaf; - class zeroinitializer : PatLeaf<(vt (bitconvert (!cast("i" # vt.Size) 0)))>; +def fpimm_pos_inf : FPImmLeaf; +def fpimm_0 : FPImmLeaf; +def fpimm_1 : FPImmLeaf; +def fpimm_neg_1 : FPImmLeaf; + + // Operands which can hold a Register or an Immediate. // // Unfortunately, since most register classes can hold multiple types, we must @@ -268,7 +271,7 @@ multiclass I3Inst" (e.g. "add.s64"). multiclass I3 { foreach t = [I16RT, I32RT, I64RT] in - defm t.Ty# : I3Inst; + defm t.Size# : I3Inst; } class I16x2 : @@ -761,10 +764,10 @@ def fabs_oneuse : OneUse1; def TESTINF_f32r : BasicNVPTXInst<(outs B1:$p), (ins B32:$a), "testp.infinite.f32", - [(set i1:$p, (seteq (fabs_oneuse f32:$a), fpimm_pos_inf))]>; + [(set i1:$p, (seteq (fabs_oneuse f32:$a), fpimm_pos_inf))]>; def TESTINF_f64r : BasicNVPTXInst<(outs B1:$p), (ins B64:$a), "testp.infinite.f64", - [(set i1:$p, (seteq (fabs_oneuse f64:$a), fpimm_pos_inf))]>; + [(set i1:$p, (seteq (fabs_oneuse f64:$a), fpimm_pos_inf))]>; //----------------------------------- // Integer Arithmetic @@ -787,8 +790,8 @@ defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube, commutative = false>; defm MULT : I3<"mul.lo.s", mul, commutative = true>; -defm MULTHS : I3<"mul.hi.s", mulhs, commutative = true>; -defm MULTHU : I3<"mul.hi.u", mulhu, commutative = true>; +defm MUL_HI_S : I3<"mul.hi.s", mulhs, commutative = true>; +defm MUL_HI_U : I3<"mul.hi.u", mulhu, commutative = true>; defm SDIV : I3<"div.s", sdiv, commutative = false>; defm UDIV : I3<"div.u", udiv, commutative = false>; @@ -905,22 +908,6 @@ let Predicates = [hasOptEnabled] in { // Floating Point Arithmetic //----------------------------------- -// Constant 1.0f -def f32imm_1 : FPImmLeaf; -// Constant 1.0 (double) -def f64imm_1 : FPImmLeaf; -// Constant -1.0 (double) -def f64imm_neg1 : FPImmLeaf; - defm FADD : F3_fma_component<"add", fadd>; defm FSUB : F3_fma_component<"sub", fsub>; defm FMUL : F3_fma_component<"mul", fmul>; @@ -994,7 +981,7 @@ def FRCP64r : BasicNVPTXInst<(outs B64:$dst), (ins B64:$b), "rcp.rn.f64", - [(set f64:$dst, (fdiv f64imm_1, f64:$b))]>; + [(set f64:$dst, (fdiv fpimm_1, f64:$b))]>; def FDIV64rr : BasicNVPTXInst<(outs B64:$dst), (ins B64:$a, B64:$b), @@ -1008,7 +995,7 @@ def FDIV64ri : // fdiv will be converted to rcp // fneg (fdiv 1.0, X) => fneg (rcp.rn X) -def : Pat<(fdiv f64imm_neg1, f64:$b), +def : Pat<(fdiv fpimm_neg_1, f64:$b), (FNEGf64 (FRCP64r $b))>; // @@ -1021,21 +1008,21 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b), }]>; -def FRCP32_approx_r : +def RCP_APPROX_F32_r : BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$b), (ins FTZFlag:$ftz), "rcp.approx$ftz.f32", - [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>; + [(set f32:$dst, (fdiv_approx fpimm_1, f32:$b))]>; // // F32 Approximate division // -def FDIV32_approx_rr : +def DIV_APPROX_F32_rr : BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), (ins FTZFlag:$ftz), "div.approx$ftz.f32", [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>; -def FDIV32_approx_ri : +def DIV_APPROX_F32_ri : BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, f32imm:$b), (ins FTZFlag:$ftz), "div.approx$ftz.f32", @@ -1052,8 +1039,8 @@ def fdiv_full : PatFrag<(ops node:$a, node:$b), }]>; -def : Pat<(fdiv_full f32imm_1, f32:$b), - (FRCP32_approx_r $b)>; +def : Pat<(fdiv_full fpimm_1, f32:$b), + (RCP_APPROX_F32_r $b)>; // // F32 Semi-accurate division @@ -1081,7 +1068,7 @@ def FRCP32r_prec : BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$b), (ins FTZFlag:$ftz), "rcp.rn$ftz.f32", - [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>; + [(set f32:$dst, (fdiv_ftz fpimm_1, f32:$b))]>; // // F32 Accurate division // @@ -1096,7 +1083,7 @@ def FDIV32ri_prec : "div.rn$ftz.f32", [(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>; -def : Pat<(fdiv f32imm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>; +def : Pat<(fdiv fpimm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>; def : Pat<(fdiv f32:$a, f32:$b), (FDIV32rr_prec $a, $b, NoFTZ)>; def : Pat<(fdiv f32:$a, fpimm:$b), (FDIV32ri_prec $a, fpimm:$b, NoFTZ)>; @@ -1519,9 +1506,9 @@ def MmaCode : Operand { // Get pointer to local stack. let hasSideEffects = false in { def MOV_DEPOT_ADDR : NVPTXInst<(outs B32:$d), (ins i32imm:$num), - "mov.b32 \t$d, __local_depot$num;", []>; + "mov.b32 \t$d, __local_depot$num;">; def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs B64:$d), (ins i32imm:$num), - "mov.b64 \t$d, __local_depot$num;", []>; + "mov.b64 \t$d, __local_depot$num;">; } @@ -1577,9 +1564,9 @@ def : Pat<(i64 externalsym:$dst), (MOV_B64_i (to_texternsym $dst))>; //---- Copy Frame Index ---- def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr), - "add.u32 \t$dst, ${addr:add};", []>; + "add.u32 \t$dst, ${addr:add};">; def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr), - "add.u64 \t$dst, ${addr:add};", []>; + "add.u64 \t$dst, ${addr:add};">; def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>; def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>; @@ -1644,12 +1631,12 @@ foreach is_convergent = [0, 1] in { NVPTXInst<(outs), (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params, i32imm:$proto), - "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", []>; + "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;">; def CALL_UNI # convergent_suffix : NVPTXInst<(outs), (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params), - "call.uni${rets:RetList} $addr, (${params:ParamList});", []>; + "call.uni${rets:RetList} $addr, (${params:ParamList});">; } defvar call_inst = !cast("CALL" # convergent_suffix); @@ -1665,10 +1652,10 @@ foreach is_convergent = [0, 1] in { def DECLARE_PARAM_array : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$align, i32imm:$size), - ".param .align $align .b8 \t$a[$size];", []>; + ".param .align $align .b8 \t$a[$size];">; def DECLARE_PARAM_scalar : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), - ".param .b$size \t$a;", []>; + ".param .b$size \t$a;">; def : Pat<(declare_array_param externalsym:$a, imm:$align, imm:$size), (DECLARE_PARAM_array (to_texternsym $a), imm:$align, imm:$size)>; @@ -1741,7 +1728,7 @@ class LD (ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}.${Sign:sign}$fromWidth " - "\t$dst, [$addr];", []>; + "\t$dst, [$addr];">; let mayLoad=1, hasSideEffects=0 in { def LD_i16 : LD; @@ -1756,7 +1743,7 @@ class ST AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$toWidth, ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}.b$toWidth" - " \t[$addr], $src;", []>; + " \t[$addr], $src;">; let mayStore=1, hasSideEffects=0 in { def ST_i16 : ST; @@ -1773,13 +1760,13 @@ multiclass LD_VEC { (ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}.v2.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr];", []>; + "\t{{$dst1, $dst2}}, [$addr];">; def _v4 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}.v4.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];">; if support_v8 then def _v8 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4, @@ -1788,7 +1775,7 @@ multiclass LD_VEC { i32imm:$fromWidth, ADDR:$addr), "ld${sem:sem}${scope:scope}${addsp:addsp}.v8.${Sign:sign}$fromWidth " "\t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, " - "[$addr];", []>; + "[$addr];">; } let mayLoad=1, hasSideEffects=0 in { defm LDV_i16 : LD_VEC; @@ -1803,14 +1790,14 @@ multiclass ST_VEC { AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth, ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}.v2.b$fromWidth " - "\t[$addr], {{$src1, $src2}};", []>; + "\t[$addr], {{$src1, $src2}};">; def _v4 : NVPTXInst< (outs), (ins O:$src1, O:$src2, O:$src3, O:$src4, AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth, ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}.v4.b$fromWidth " - "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; + "\t[$addr], {{$src1, $src2, $src3, $src4}};">; if support_v8 then def _v8 : NVPTXInst< (outs), @@ -1820,7 +1807,7 @@ multiclass ST_VEC { ADDR:$addr), "st${sem:sem}${scope:scope}${addsp:addsp}.v8.b$fromWidth " "\t[$addr], " - "{{$src1, $src2, $src3, $src4, $src5, $src6, $src7, $src8}};", []>; + "{{$src1, $src2, $src3, $src4, $src5, $src6, $src7, $src8}};">; } let mayStore=1, hasSideEffects=0 in { @@ -2015,60 +2002,52 @@ let hasSideEffects = false in { def V4I16toI64 : NVPTXInst<(outs B64:$d), (ins B16:$s1, B16:$s2, B16:$s3, B16:$s4), - "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>; + "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};">; def V2I16toI32 : NVPTXInst<(outs B32:$d), (ins B16:$s1, B16:$s2), - "mov.b32 \t$d, {{$s1, $s2}};", []>; + "mov.b32 \t$d, {{$s1, $s2}};">; def V2I32toI64 : NVPTXInst<(outs B64:$d), (ins B32:$s1, B32:$s2), - "mov.b64 \t$d, {{$s1, $s2}};", []>; + "mov.b64 \t$d, {{$s1, $s2}};">; def V2I64toI128 : NVPTXInst<(outs B128:$d), (ins B64:$s1, B64:$s2), - "mov.b128 \t$d, {{$s1, $s2}};", []>; + "mov.b128 \t$d, {{$s1, $s2}};">; // unpack a larger int register to a set of smaller int registers def I64toV4I16 : NVPTXInst<(outs B16:$d1, B16:$d2, B16:$d3, B16:$d4), (ins B64:$s), - "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; + "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;">; def I32toV2I16 : NVPTXInst<(outs B16:$d1, B16:$d2), (ins B32:$s), - "mov.b32 \t{{$d1, $d2}}, $s;", []>; + "mov.b32 \t{{$d1, $d2}}, $s;">; def I64toV2I32 : NVPTXInst<(outs B32:$d1, B32:$d2), (ins B64:$s), - "mov.b64 \t{{$d1, $d2}}, $s;", []>; + "mov.b64 \t{{$d1, $d2}}, $s;">; def I128toV2I64: NVPTXInst<(outs B64:$d1, B64:$d2), (ins B128:$s), - "mov.b128 \t{{$d1, $d2}}, $s;", []>; + "mov.b128 \t{{$d1, $d2}}, $s;">; - def I32toI16H : NVPTXInst<(outs B16:$high), - (ins B32:$s), - "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}", - []>; - def I32toI16L : NVPTXInst<(outs B16:$low), - (ins B32:$s), - "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}", - []>; - def I64toI32H : NVPTXInst<(outs B32:$high), - (ins B64:$s), - "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}", - []>; - def I64toI32L : NVPTXInst<(outs B32:$low), - (ins B64:$s), - "{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}", - []>; + def I32toI16H : NVPTXInst<(outs B16:$high), (ins B32:$s), + "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}">; + def I32toI16L : NVPTXInst<(outs B16:$low), (ins B32:$s), + "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}">; + def I64toI32H : NVPTXInst<(outs B32:$high), (ins B64:$s), + "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}">; + def I64toI32L : NVPTXInst<(outs B32:$low), (ins B64:$s), + "{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}">; // PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the // unused high/low part. let Predicates = [hasPTX<71>] in { def I32toI16H_Sink : NVPTXInst<(outs B16:$high), (ins B32:$s), - "mov.b32 \t{{_, $high}}, $s;", []>; + "mov.b32 \t{{_, $high}}, $s;">; def I32toI16L_Sink : NVPTXInst<(outs B16:$low), (ins B32:$s), - "mov.b32 \t{{$low, _}}, $s;", []>; + "mov.b32 \t{{$low, _}}, $s;">; def I64toI32H_Sink : NVPTXInst<(outs B32:$high), (ins B64:$s), - "mov.b64 \t{{_, $high}}, $s;", []>; + "mov.b64 \t{{_, $high}}, $s;">; def I64toI32L_Sink : NVPTXInst<(outs B32:$low), (ins B64:$s), - "mov.b64 \t{{$low, _}}, $s;", []>; + "mov.b64 \t{{$low, _}}, $s;">; } } @@ -2426,10 +2405,6 @@ foreach scope = ["sys", "gpu", "cluster", "cta"] in { def atomic_thread_fence_release_#scope: NVPTXFenceInst>; } -def fpimm_any_zero : FPImmLeaf; - // Perform substitution if fma only has one use, and also if instruction has // nnan instruction flag or if the TM has NoNaNsFPMath def NVPTX_fma_oneuse_and_nnan : PatFrag<(ops node:$a, node:$b, node:$c), @@ -2451,11 +2426,11 @@ class FMARELUInst [(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>; let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in { - def FMARELU_F16 : FMARELUInst; + def FMARELU_F16 : FMARELUInst; def FMARELU_F16X2 : FMARELUInst>; } let Predicates = [hasBF16Math, hasPTX<70>, hasSM<80>] in { - def FMARELU_BF16 : FMARELUInst; + def FMARELU_BF16 : FMARELUInst; def FMARELU_BF16X2 : FMARELUInst>; } diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index d4a0ca794cd88..721afae4db51c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -6,44 +6,24 @@ // //===----------------------------------------------------------------------===// -def immFloat0 : PatLeaf<(fpimm), [{ - float f = (float)N->getValueAPF().convertToFloat(); - return (f==0.0f); -}]>; - -def immFloat1 : PatLeaf<(fpimm), [{ - float f = (float)N->getValueAPF().convertToFloat(); - return (f==1.0f); -}]>; - -def immDouble0 : PatLeaf<(fpimm), [{ - double d = (double)N->getValueAPF().convertToDouble(); - return (d==0.0); -}]>; - -def immDouble1 : PatLeaf<(fpimm), [{ - double d = (double)N->getValueAPF().convertToDouble(); - return (d==1.0); -}]>; - def AS_match { code generic = [{ - return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); + return cast(N)->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC; }]; code shared = [{ - return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); + return cast(N)->getAddressSpace() == llvm::ADDRESS_SPACE_SHARED; }]; code shared_cluster = [{ - return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED_CLUSTER); + return cast(N)->getAddressSpace() == llvm::ADDRESS_SPACE_SHARED_CLUSTER; }]; code global = [{ - return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); + return cast(N)->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL; }]; code const = [{ - return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_CONST); + return cast(N)->getAddressSpace() == llvm::ADDRESS_SPACE_CONST; }]; code param = [{ - return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_PARAM); + return cast(N)->getAddressSpace() == llvm::ADDRESS_SPACE_PARAM; }]; } @@ -659,22 +639,22 @@ multiclass CP_ASYNC_BULK_TENSOR_G2S_INTR def "" : NVPTXInst<(outs), !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag, (ins CTAGroupFlags:$cg)), - !strconcat(G2S_STRINGS.inst_name, asm_str, ";"), []>, + !strconcat(G2S_STRINGS.inst_name, asm_str, ";")>, Requires<[hasPTX<80>, hasSM<90>]>; def _MC : NVPTXInst<(outs), !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag, (ins B16:$mc, CTAGroupFlags:$cg)), - !strconcat(G2S_STRINGS.inst_name, asm_str, ", $mc;"), []>, + !strconcat(G2S_STRINGS.inst_name, asm_str, ", $mc;")>, Requires<[hasPTX<80>, hasSM<90>]>; def _CH : NVPTXInst<(outs), !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag, (ins B64:$ch, CTAGroupFlags:$cg)), - !strconcat(G2S_STRINGS.inst_name, asm_str, ", $ch;"), []>, + !strconcat(G2S_STRINGS.inst_name, asm_str, ", $ch;")>, Requires<[hasPTX<80>, hasSM<90>]>; def _MC_CH : NVPTXInst<(outs), !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag, (ins B16:$mc, B64:$ch, CTAGroupFlags:$cg)), - !strconcat(G2S_STRINGS.inst_name, asm_str, ", $mc, $ch;"), []>, + !strconcat(G2S_STRINGS.inst_name, asm_str, ", $mc, $ch;")>, Requires<[hasPTX<80>, hasSM<90>]>; } @@ -876,11 +856,11 @@ multiclass CP_ASYNC_BULK_TENSOR_REDUCE_INTR def "" : NVPTXInst<(outs), !con((ins rc:$src, B64:$tmap), dims_dag, (ins TMAReductionFlags:$red_op)), - !strconcat(prefix, "${red_op}", suffix, asm_str, ";"), []>, + !strconcat(prefix, "${red_op}", suffix, asm_str, ";")>, Requires<[hasPTX<80>, hasSM<90>]>; def _CH : NVPTXInst<(outs), !con((ins rc:$src, B64:$tmap), dims_dag, (ins B64:$ch, TMAReductionFlags:$red_op)), - !strconcat(prefix, "${red_op}", suffix, ".L2::cache_hint", asm_str, ", $ch;"), []>, + !strconcat(prefix, "${red_op}", suffix, ".L2::cache_hint", asm_str, ", $ch;")>, Requires<[hasPTX<80>, hasSM<90>]>; } @@ -1112,30 +1092,30 @@ let Predicates = [hasPTX<70>, hasSM<80>] in { // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. // Same story for fmax, fmin. -def : Pat<(int_nvvm_fmin_f immFloat1, - (int_nvvm_fmax_f immFloat0, f32:$a)), +def : Pat<(int_nvvm_fmin_f fpimm_1, + (int_nvvm_fmax_f fpimm_0, f32:$a)), (CVT_f32_f32 $a, CvtSAT)>; -def : Pat<(int_nvvm_fmin_f immFloat1, - (int_nvvm_fmax_f f32:$a, immFloat0)), +def : Pat<(int_nvvm_fmin_f fpimm_1, + (int_nvvm_fmax_f f32:$a, fpimm_0)), (CVT_f32_f32 $a, CvtSAT)>; def : Pat<(int_nvvm_fmin_f - (int_nvvm_fmax_f immFloat0, f32:$a), immFloat1), + (int_nvvm_fmax_f fpimm_0, f32:$a), fpimm_1), (CVT_f32_f32 $a, CvtSAT)>; def : Pat<(int_nvvm_fmin_f - (int_nvvm_fmax_f f32:$a, immFloat0), immFloat1), + (int_nvvm_fmax_f f32:$a, fpimm_0), fpimm_1), (CVT_f32_f32 $a, CvtSAT)>; -def : Pat<(int_nvvm_fmin_d immDouble1, - (int_nvvm_fmax_d immDouble0, f64:$a)), +def : Pat<(int_nvvm_fmin_d fpimm_1, + (int_nvvm_fmax_d fpimm_0, f64:$a)), (CVT_f64_f64 $a, CvtSAT)>; -def : Pat<(int_nvvm_fmin_d immDouble1, - (int_nvvm_fmax_d f64:$a, immDouble0)), +def : Pat<(int_nvvm_fmin_d fpimm_1, + (int_nvvm_fmax_d f64:$a, fpimm_0)), (CVT_f64_f64 $a, CvtSAT)>; def : Pat<(int_nvvm_fmin_d - (int_nvvm_fmax_d immDouble0, f64:$a), immDouble1), + (int_nvvm_fmax_d fpimm_0, f64:$a), fpimm_1), (CVT_f64_f64 $a, CvtSAT)>; def : Pat<(int_nvvm_fmin_d - (int_nvvm_fmax_d f64:$a, immDouble0), immDouble1), + (int_nvvm_fmax_d f64:$a, fpimm_0), fpimm_1), (CVT_f64_f64 $a, CvtSAT)>; @@ -1329,12 +1309,12 @@ defm INT_NVVM_FMAN : MIN_MAX<"max">; // Multiplication // -def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16", B16, B16, B16, int_nvvm_mulhi_s>; -def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16", B16, B16, B16, int_nvvm_mulhi_us>; -def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32", B32, B32, B32, int_nvvm_mulhi_i>; -def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32", B32, B32, B32, int_nvvm_mulhi_ui>; -def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64", B64, B64, B64, int_nvvm_mulhi_ll>; -def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64", B64, B64, B64, int_nvvm_mulhi_ull>; +def : Pat<(int_nvvm_mulhi_s i16:$a, i16:$b), (MUL_HI_S16rr $a, $b)>; +def : Pat<(int_nvvm_mulhi_us i16:$a, i16:$b), (MUL_HI_U16rr $a, $b)>; +def : Pat<(int_nvvm_mulhi_i i32:$a, i32:$b), (MUL_HI_S32rr $a, $b)>; +def : Pat<(int_nvvm_mulhi_ui i32:$a, i32:$b), (MUL_HI_U32rr $a, $b)>; +def : Pat<(int_nvvm_mulhi_ll i64:$a, i64:$b), (MUL_HI_S64rr $a, $b)>; +def : Pat<(int_nvvm_mulhi_ull i64:$a, i64:$b), (MUL_HI_U64rr $a, $b)>; def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32", B32, B32, B32, int_nvvm_mul_rn_ftz_f>; def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32", B32, B32, B32, int_nvvm_mul_rn_f>; @@ -1357,8 +1337,8 @@ def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32", B32, B32, B32, int_nvvm_mul24_u // Div // -def INT_NVVM_DIV_APPROX_FTZ_F : F_MATH_2<"div.approx.ftz.f32", B32, B32, B32, int_nvvm_div_approx_ftz_f>; -def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32", B32, B32, B32, int_nvvm_div_approx_f>; +def : Pat<(int_nvvm_div_approx_ftz_f f32:$a, f32:$b), (DIV_APPROX_F32_rr $a, $b, FTZ)>; +def : Pat<(int_nvvm_div_approx_f f32:$a, f32:$b), (DIV_APPROX_F32_rr $a, $b, NoFTZ)>; def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32", B32, B32, B32, int_nvvm_div_rn_ftz_f>; def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32", B32, B32, B32, int_nvvm_div_rn_f>; @@ -1663,13 +1643,13 @@ def : Pat<(int_nvvm_rsqrt_approx_d f64:$a), (RSQRT_APPROX_f64 $a, NoFTZ)>; // 1.0f / sqrt_approx -> rsqrt_approx let Predicates = [doRsqrtOpt] in { - def : Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_f f32:$a)), + def : Pat<(fdiv fpimm_1, (int_nvvm_sqrt_approx_f f32:$a)), (RSQRT_APPROX_f32 $a, NoFTZ)>; - def : Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_ftz_f f32:$a)), + def : Pat<(fdiv fpimm_1, (int_nvvm_sqrt_approx_ftz_f f32:$a)), (RSQRT_APPROX_f32 $a, FTZ)>; // same for int_nvvm_sqrt_f when non-precision sqrt is requested - def : Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)), + def : Pat<(fdiv fpimm_1, (fsqrt_approx f32:$a)), (RSQRT_APPROX_f32 $a)>; } // @@ -2231,7 +2211,7 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; class LDU_G : NVPTXInst<(outs regclass:$result), (ins i32imm:$fromWidth, ADDR:$src), - "ldu.global.b$fromWidth \t$result, [$src];", []>; + "ldu.global.b$fromWidth \t$result, [$src];">; def LDU_GLOBAL_i16 : LDU_G; def LDU_GLOBAL_i32 : LDU_G; @@ -2243,13 +2223,13 @@ def LDU_GLOBAL_i64 : LDU_G; class VLDU_G_ELE_V2 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins i32imm:$fromWidth, ADDR:$src), - "ldu.global.v2.b$fromWidth \t{{$dst1, $dst2}}, [$src];", []>; + "ldu.global.v2.b$fromWidth \t{{$dst1, $dst2}}, [$src];">; class VLDU_G_ELE_V4 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins i32imm:$fromWidth, ADDR:$src), - "ldu.global.v4.b$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; + "ldu.global.v4.b$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];">; def LDU_GLOBAL_v2i16 : VLDU_G_ELE_V2; @@ -2270,9 +2250,8 @@ def LDU_GLOBAL_v4i32 : VLDU_G_ELE_V4; class LDG_G : NVPTXInst<(outs regclass:$result), (ins AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$src), - "ld.global.nc.${Sign:sign}$fromWidth \t$result, [$src];", []>; + "ld.global.nc.${Sign:sign}$fromWidth \t$result, [$src];">; -def LD_GLOBAL_NC_i8 : LDG_G; def LD_GLOBAL_NC_i16 : LDG_G; def LD_GLOBAL_NC_i32 : LDG_G; def LD_GLOBAL_NC_i64 : LDG_G; @@ -2283,19 +2262,19 @@ def LD_GLOBAL_NC_i64 : LDG_G; class VLDG_G_ELE_V2 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$src), - "ld.global.nc.v2.${Sign:sign}$fromWidth \t{{$dst1, $dst2}}, [$src];", []>; + "ld.global.nc.v2.${Sign:sign}$fromWidth \t{{$dst1, $dst2}}, [$src];">; class VLDG_G_ELE_V4 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$src), - "ld.global.nc.v4.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; + "ld.global.nc.v4.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];">; class VLDG_G_ELE_V8 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4, regclass:$dst5, regclass:$dst6, regclass:$dst7, regclass:$dst8), (ins AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$src), - "ld.global.nc.v8.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, [$src];", []>; + "ld.global.nc.v8.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, [$src];">; // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. def LD_GLOBAL_NC_v2i16 : VLDG_G_ELE_V2; @@ -3540,20 +3519,13 @@ multiclass SULD_1D { def _I : SULD_1D_base; } -defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", B16>; -defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", B16>; -defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", B32>; -defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", B64>; - -defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", B16>; -defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", B16>; -defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", B32>; -defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", B64>; - -defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", B16>; -defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", B16>; -defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", B32>; -defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_1D_I8_ # op_upper : SULD_1D<"suld.b.1d.b8." # op, B16>; + defm SULD_1D_I16_ # op_upper : SULD_1D<"suld.b.1d.b16." # op, B16>; + defm SULD_1D_I32_ # op_upper : SULD_1D<"suld.b.1d.b32." # op, B32>; + defm SULD_1D_I64_ # op_upper : SULD_1D<"suld.b.1d.b64." # op, B64>; +} class SULD_1D_ARRAY_base pattern = []> @@ -3570,20 +3542,13 @@ multiclass SULD_1D_ARRAY { def _I : SULD_1D_ARRAY_base; } -defm SULD_1D_ARRAY_I8_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", B16>; -defm SULD_1D_ARRAY_I16_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", B16>; -defm SULD_1D_ARRAY_I32_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", B32>; -defm SULD_1D_ARRAY_I64_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", B64>; - -defm SULD_1D_ARRAY_I8_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", B16>; -defm SULD_1D_ARRAY_I16_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", B16>; -defm SULD_1D_ARRAY_I32_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", B32>; -defm SULD_1D_ARRAY_I64_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", B64>; - -defm SULD_1D_ARRAY_I8_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", B16>; -defm SULD_1D_ARRAY_I16_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", B16>; -defm SULD_1D_ARRAY_I32_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", B32>; -defm SULD_1D_ARRAY_I64_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_1D_ARRAY_I8_ # op_upper : SULD_1D_ARRAY<"suld.b.a1d.b8." # op, B16>; + defm SULD_1D_ARRAY_I16_ # op_upper : SULD_1D_ARRAY<"suld.b.a1d.b16." # op, B16>; + defm SULD_1D_ARRAY_I32_ # op_upper : SULD_1D_ARRAY<"suld.b.a1d.b32." # op, B32>; + defm SULD_1D_ARRAY_I64_ # op_upper : SULD_1D_ARRAY<"suld.b.a1d.b64." # op, B64>; +} class SULD_2D_base pattern = []> @@ -3599,20 +3564,13 @@ multiclass SULD_2D { def _I : SULD_2D_base; } -defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", B16>; -defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", B16>; -defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", B32>; -defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", B64>; - -defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", B16>; -defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", B16>; -defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", B32>; -defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", B64>; - -defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", B16>; -defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", B16>; -defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", B32>; -defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_2D_I8_ # op_upper : SULD_2D<"suld.b.2d.b8." # op, B16>; + defm SULD_2D_I16_ # op_upper : SULD_2D<"suld.b.2d.b16." # op, B16>; + defm SULD_2D_I32_ # op_upper : SULD_2D<"suld.b.2d.b32." # op, B32>; + defm SULD_2D_I64_ # op_upper : SULD_2D<"suld.b.2d.b64." # op, B64>; +} class SULD_2D_ARRAY_base pattern = []> @@ -3629,20 +3587,13 @@ multiclass SULD_2D_ARRAY { def _I : SULD_2D_ARRAY_base; } -defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", B16>; -defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", B16>; -defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", B32>; -defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", B64>; - -defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", B16>; -defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", B16>; -defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", B32>; -defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", B64>; - -defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", B16>; -defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", B16>; -defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", B32>; -defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_2D_ARRAY_I8_ # op_upper : SULD_2D_ARRAY<"suld.b.a2d.b8." # op, B16>; + defm SULD_2D_ARRAY_I16_ # op_upper : SULD_2D_ARRAY<"suld.b.a2d.b16." # op, B16>; + defm SULD_2D_ARRAY_I32_ # op_upper : SULD_2D_ARRAY<"suld.b.a2d.b32." # op, B32>; + defm SULD_2D_ARRAY_I64_ # op_upper : SULD_2D_ARRAY<"suld.b.a2d.b64." # op, B64>; +} class SULD_3D_base pattern = []> @@ -3659,20 +3610,13 @@ multiclass SULD_3D { def _I : SULD_3D_base; } -defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", B16>; -defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", B16>; -defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", B32>; -defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", B64>; - -defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", B16>; -defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", B16>; -defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", B32>; -defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", B64>; - -defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", B16>; -defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", B16>; -defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", B32>; -defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_3D_I8_ # op_upper : SULD_3D<"suld.b.3d.b8." # op, B16>; + defm SULD_3D_I16_ # op_upper : SULD_3D<"suld.b.3d.b16." # op, B16>; + defm SULD_3D_I32_ # op_upper : SULD_3D<"suld.b.3d.b32." # op, B32>; + defm SULD_3D_I64_ # op_upper : SULD_3D<"suld.b.3d.b64." # op, B64>; +} } let IsSuld = 2 in { @@ -3692,20 +3636,13 @@ multiclass SULD_1D_V2 { def _I : SULD_1D_V2_base; } -defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", B16>; -defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", B16>; -defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", B32>; -defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", B64>; - -defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", B16>; -defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", B16>; -defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", B32>; -defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", B64>; - -defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", B16>; -defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", B16>; -defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", B32>; -defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_1D_V2I8_ # op_upper : SULD_1D_V2<"suld.b.1d.v2.b8." # op, B16>; + defm SULD_1D_V2I16_ # op_upper : SULD_1D_V2<"suld.b.1d.v2.b16." # op, B16>; + defm SULD_1D_V2I32_ # op_upper : SULD_1D_V2<"suld.b.1d.v2.b32." # op, B32>; + defm SULD_1D_V2I64_ # op_upper : SULD_1D_V2<"suld.b.1d.v2.b64." # op, B64>; +} class SULD_1D_ARRAY_V2_base pattern = []> @@ -3722,20 +3659,13 @@ multiclass SULD_1D_ARRAY_V2 { def _I : SULD_1D_ARRAY_V2_base; } -defm SULD_1D_ARRAY_V2I8_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", B16>; -defm SULD_1D_ARRAY_V2I16_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", B16>; -defm SULD_1D_ARRAY_V2I32_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", B32>; -defm SULD_1D_ARRAY_V2I64_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", B64>; - -defm SULD_1D_ARRAY_V2I8_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", B16>; -defm SULD_1D_ARRAY_V2I16_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", B16>; -defm SULD_1D_ARRAY_V2I32_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", B32>; -defm SULD_1D_ARRAY_V2I64_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", B64>; - -defm SULD_1D_ARRAY_V2I8_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", B16>; -defm SULD_1D_ARRAY_V2I16_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", B16>; -defm SULD_1D_ARRAY_V2I32_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", B32>; -defm SULD_1D_ARRAY_V2I64_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_1D_ARRAY_V2I8_ # op_upper : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8." # op, B16>; + defm SULD_1D_ARRAY_V2I16_ # op_upper : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16." # op, B16>; + defm SULD_1D_ARRAY_V2I32_ # op_upper : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32." # op, B32>; + defm SULD_1D_ARRAY_V2I64_ # op_upper : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64." # op, B64>; +} class SULD_2D_V2_base pattern = []> @@ -3752,20 +3682,13 @@ multiclass SULD_2D_V2 { def _I : SULD_2D_V2_base; } -defm SULD_2D_V2I8_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", B16>; -defm SULD_2D_V2I16_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", B16>; -defm SULD_2D_V2I32_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", B32>; -defm SULD_2D_V2I64_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", B64>; - -defm SULD_2D_V2I8_TRAP : SULD_2D_V2<"suld.b.2d.v2.b8.trap", B16>; -defm SULD_2D_V2I16_TRAP : SULD_2D_V2<"suld.b.2d.v2.b16.trap", B16>; -defm SULD_2D_V2I32_TRAP : SULD_2D_V2<"suld.b.2d.v2.b32.trap", B32>; -defm SULD_2D_V2I64_TRAP : SULD_2D_V2<"suld.b.2d.v2.b64.trap", B64>; - -defm SULD_2D_V2I8_ZERO : SULD_2D_V2<"suld.b.2d.v2.b8.zero", B16>; -defm SULD_2D_V2I16_ZERO : SULD_2D_V2<"suld.b.2d.v2.b16.zero", B16>; -defm SULD_2D_V2I32_ZERO : SULD_2D_V2<"suld.b.2d.v2.b32.zero", B32>; -defm SULD_2D_V2I64_ZERO : SULD_2D_V2<"suld.b.2d.v2.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_2D_V2I8_ # op_upper : SULD_2D_V2<"suld.b.2d.v2.b8." # op, B16>; + defm SULD_2D_V2I16_ # op_upper : SULD_2D_V2<"suld.b.2d.v2.b16." # op, B16>; + defm SULD_2D_V2I32_ # op_upper : SULD_2D_V2<"suld.b.2d.v2.b32." # op, B32>; + defm SULD_2D_V2I64_ # op_upper : SULD_2D_V2<"suld.b.2d.v2.b64." # op, B64>; +} class SULD_2D_ARRAY_V2_base pattern = []> @@ -3782,20 +3705,13 @@ multiclass SULD_2D_ARRAY_V2 { def _I : SULD_2D_ARRAY_V2_base; } -defm SULD_2D_ARRAY_V2I8_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", B16>; -defm SULD_2D_ARRAY_V2I16_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", B16>; -defm SULD_2D_ARRAY_V2I32_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", B32>; -defm SULD_2D_ARRAY_V2I64_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", B64>; - -defm SULD_2D_ARRAY_V2I8_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", B16>; -defm SULD_2D_ARRAY_V2I16_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", B16>; -defm SULD_2D_ARRAY_V2I32_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", B32>; -defm SULD_2D_ARRAY_V2I64_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", B64>; - -defm SULD_2D_ARRAY_V2I8_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", B16>; -defm SULD_2D_ARRAY_V2I16_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", B16>; -defm SULD_2D_ARRAY_V2I32_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", B32>; -defm SULD_2D_ARRAY_V2I64_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_2D_ARRAY_V2I8_ # op_upper : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8." # op, B16>; + defm SULD_2D_ARRAY_V2I16_ # op_upper : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16." # op, B16>; + defm SULD_2D_ARRAY_V2I32_ # op_upper : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32." # op, B32>; + defm SULD_2D_ARRAY_V2I64_ # op_upper : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64." # op, B64>; +} class SULD_3D_V2_base pattern = []> @@ -3812,20 +3728,13 @@ multiclass SULD_3D_V2 { def _I : SULD_3D_V2_base; } -defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", B16>; -defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", B16>; -defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", B32>; -defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", B64>; - -defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", B16>; -defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", B16>; -defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", B32>; -defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", B64>; - -defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", B16>; -defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", B16>; -defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", B32>; -defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_3D_V2I8_ # op_upper : SULD_3D_V2<"suld.b.3d.v2.b8." # op, B16>; + defm SULD_3D_V2I16_ # op_upper : SULD_3D_V2<"suld.b.3d.v2.b16." # op, B16>; + defm SULD_3D_V2I32_ # op_upper : SULD_3D_V2<"suld.b.3d.v2.b32." # op, B32>; + defm SULD_3D_V2I64_ # op_upper : SULD_3D_V2<"suld.b.3d.v2.b64." # op, B64>; +} } @@ -3846,17 +3755,12 @@ multiclass SULD_1D_V4 { def _I : SULD_1D_V4_base; } -defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", B16>; -defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", B16>; -defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", B32>; - -defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", B16>; -defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", B16>; -defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", B32>; - -defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", B16>; -defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", B16>; -defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", B32>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_1D_V4I8_ # op_upper : SULD_1D_V4<"suld.b.1d.v4.b8." # op, B16>; + defm SULD_1D_V4I16_ # op_upper : SULD_1D_V4<"suld.b.1d.v4.b16." # op, B16>; + defm SULD_1D_V4I32_ # op_upper : SULD_1D_V4<"suld.b.1d.v4.b32." # op, B32>; +} class SULD_1D_ARRAY_V4_base pattern = []> @@ -3874,17 +3778,12 @@ multiclass SULD_1D_ARRAY_V4 { def _I : SULD_1D_ARRAY_V4_base; } -defm SULD_1D_ARRAY_V4I8_CLAMP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", B16>; -defm SULD_1D_ARRAY_V4I16_CLAMP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", B16>; -defm SULD_1D_ARRAY_V4I32_CLAMP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", B32>; - -defm SULD_1D_ARRAY_V4I8_TRAP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", B16>; -defm SULD_1D_ARRAY_V4I16_TRAP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", B16>; -defm SULD_1D_ARRAY_V4I32_TRAP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", B32>; - -defm SULD_1D_ARRAY_V4I8_ZERO : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", B16>; -defm SULD_1D_ARRAY_V4I16_ZERO : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", B16>; -defm SULD_1D_ARRAY_V4I32_ZERO : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", B32>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_1D_ARRAY_V4I8_ # op_upper : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8." # op, B16>; + defm SULD_1D_ARRAY_V4I16_ # op_upper : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16." # op, B16>; + defm SULD_1D_ARRAY_V4I32_ # op_upper : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32." # op, B32>; +} class SULD_2D_V4_base pattern = []> @@ -3901,17 +3800,12 @@ multiclass SULD_2D_V4 { def _I : SULD_2D_V4_base; } -defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", B16>; -defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", B16>; -defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", B32>; - -defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", B16>; -defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", B16>; -defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", B32>; - -defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", B16>; -defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", B16>; -defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", B32>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_2D_V4I8_ # op_upper : SULD_2D_V4<"suld.b.2d.v4.b8." # op, B16>; + defm SULD_2D_V4I16_ # op_upper : SULD_2D_V4<"suld.b.2d.v4.b16." # op, B16>; + defm SULD_2D_V4I32_ # op_upper : SULD_2D_V4<"suld.b.2d.v4.b32." # op, B32>; +} class SULD_2D_ARRAY_V4_base pattern = []> @@ -3929,17 +3823,12 @@ multiclass SULD_2D_ARRAY_V4 { def _I : SULD_2D_ARRAY_V4_base; } -defm SULD_2D_ARRAY_V4I8_CLAMP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", B16>; -defm SULD_2D_ARRAY_V4I16_CLAMP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", B16>; -defm SULD_2D_ARRAY_V4I32_CLAMP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", B32>; - -defm SULD_2D_ARRAY_V4I8_TRAP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", B16>; -defm SULD_2D_ARRAY_V4I16_TRAP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", B16>; -defm SULD_2D_ARRAY_V4I32_TRAP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", B32>; - -defm SULD_2D_ARRAY_V4I8_ZERO : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", B16>; -defm SULD_2D_ARRAY_V4I16_ZERO : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", B16>; -defm SULD_2D_ARRAY_V4I32_ZERO : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", B32>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_2D_ARRAY_V4I8_ # op_upper : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8." # op, B16>; + defm SULD_2D_ARRAY_V4I16_ # op_upper : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16." # op, B16>; + defm SULD_2D_ARRAY_V4I32_ # op_upper : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32." # op, B32>; +} class SULD_3D_V4_base pattern = []> @@ -3956,17 +3845,12 @@ multiclass SULD_3D_V4 { def _I : SULD_3D_V4_base; } -defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", B16>; -defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", B16>; -defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", B32>; - -defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", B16>; -defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", B16>; -defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", B32>; - -defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", B16>; -defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", B16>; -defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", B32>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SULD_3D_V4I8_ # op_upper : SULD_3D_V4<"suld.b.3d.v4.b8." # op, B16>; + defm SULD_3D_V4I16_ # op_upper : SULD_3D_V4<"suld.b.3d.v4.b16." # op, B16>; + defm SULD_3D_V4I32_ # op_upper : SULD_3D_V4<"suld.b.3d.v4.b32." # op, B32>; +} } @@ -4037,20 +3921,13 @@ multiclass SUST_1D { def _I : SUST_1D_base; } -defm SUST_B_1D_I8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", B16>; -defm SUST_B_1D_I16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", B16>; -defm SUST_B_1D_I32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", B32>; -defm SUST_B_1D_I64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", B64>; - -defm SUST_B_1D_I8_TRAP : SUST_1D<"sust.b.1d.b8.trap", B16>; -defm SUST_B_1D_I16_TRAP : SUST_1D<"sust.b.1d.b16.trap", B16>; -defm SUST_B_1D_I32_TRAP : SUST_1D<"sust.b.1d.b32.trap", B32>; -defm SUST_B_1D_I64_TRAP : SUST_1D<"sust.b.1d.b64.trap", B64>; - -defm SUST_B_1D_I8_ZERO : SUST_1D<"sust.b.1d.b8.zero", B16>; -defm SUST_B_1D_I16_ZERO : SUST_1D<"sust.b.1d.b16.zero", B16>; -defm SUST_B_1D_I32_ZERO : SUST_1D<"sust.b.1d.b32.zero", B32>; -defm SUST_B_1D_I64_ZERO : SUST_1D<"sust.b.1d.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_1D_I8_ # op_upper : SUST_1D<"sust.b.1d.b8." # op, B16>; + defm SUST_B_1D_I16_ # op_upper : SUST_1D<"sust.b.1d.b16." # op, B16>; + defm SUST_B_1D_I32_ # op_upper : SUST_1D<"sust.b.1d.b32." # op, B32>; + defm SUST_B_1D_I64_ # op_upper : SUST_1D<"sust.b.1d.b64." # op, B64>; +} defm SUST_P_1D_I8_TRAP : SUST_1D<"sust.p.1d.b8.trap", B16>; defm SUST_P_1D_I16_TRAP : SUST_1D<"sust.p.1d.b16.trap", B16>; @@ -4068,23 +3945,13 @@ multiclass SUST_1D_V2 { def _I : SUST_1D_V2_base; } -// int_nvvm_sust_b_1d_v2i8_clamp - -defm SUST_B_1D_V2I8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", B16>; -defm SUST_B_1D_V2I16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", B16>; -defm SUST_B_1D_V2I32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", B32>; -defm SUST_B_1D_V2I64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", B64>; - -defm SUST_B_1D_V2I8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", B16>; -defm SUST_B_1D_V2I16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", B16>; -defm SUST_B_1D_V2I32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", B32>; -defm SUST_B_1D_V2I64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", B64>; - -defm SUST_B_1D_V2I8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", B16>; -defm SUST_B_1D_V2I16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", B16>; -defm SUST_B_1D_V2I32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", B32>; -defm SUST_B_1D_V2I64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", B64>; - +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_1D_V2I8_ # op_upper : SUST_1D_V2<"sust.b.1d.v2.b8." # op, B16>; + defm SUST_B_1D_V2I16_ # op_upper : SUST_1D_V2<"sust.b.1d.v2.b16." # op, B16>; + defm SUST_B_1D_V2I32_ # op_upper : SUST_1D_V2<"sust.b.1d.v2.b32." # op, B32>; + defm SUST_B_1D_V2I64_ # op_upper : SUST_1D_V2<"sust.b.1d.v2.b64." # op, B64>; +} defm SUST_P_1D_V2I8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", B16>; defm SUST_P_1D_V2I16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", B16>; defm SUST_P_1D_V2I32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", B32>; @@ -4103,17 +3970,12 @@ multiclass SUST_1D_V4 { def _I : SUST_1D_V4_base; } -defm SUST_B_1D_V4I8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", B16>; -defm SUST_B_1D_V4I16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", B16>; -defm SUST_B_1D_V4I32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", B32>; - -defm SUST_B_1D_V4I8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", B16>; -defm SUST_B_1D_V4I16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", B16>; -defm SUST_B_1D_V4I32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", B32>; - -defm SUST_B_1D_V4I8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", B16>; -defm SUST_B_1D_V4I16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", B16>; -defm SUST_B_1D_V4I32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", B32>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_1D_V4I8_ # op_upper : SUST_1D_V4<"sust.b.1d.v4.b8." # op, B16>; + defm SUST_B_1D_V4I16_ # op_upper : SUST_1D_V4<"sust.b.1d.v4.b16." # op, B16>; + defm SUST_B_1D_V4I32_ # op_upper : SUST_1D_V4<"sust.b.1d.v4.b32." # op, B32>; +} defm SUST_P_1D_V4I8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", B16>; defm SUST_P_1D_V4I16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", B16>; @@ -4131,20 +3993,13 @@ multiclass SUST_1D_ARRAY { def _I : SUST_1D_ARRAY_base; } -defm SUST_B_1D_ARRAY_I8_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", B16>; -defm SUST_B_1D_ARRAY_I16_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", B16>; -defm SUST_B_1D_ARRAY_I32_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", B32>; -defm SUST_B_1D_ARRAY_I64_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", B64>; - -defm SUST_B_1D_ARRAY_I8_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", B16>; -defm SUST_B_1D_ARRAY_I16_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", B16>; -defm SUST_B_1D_ARRAY_I32_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", B32>; -defm SUST_B_1D_ARRAY_I64_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", B64>; - -defm SUST_B_1D_ARRAY_I8_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", B16>; -defm SUST_B_1D_ARRAY_I16_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", B16>; -defm SUST_B_1D_ARRAY_I32_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", B32>; -defm SUST_B_1D_ARRAY_I64_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_1D_ARRAY_I8_ # op_upper : SUST_1D_ARRAY<"sust.b.a1d.b8." # op, B16>; + defm SUST_B_1D_ARRAY_I16_ # op_upper : SUST_1D_ARRAY<"sust.b.a1d.b16." # op, B16>; + defm SUST_B_1D_ARRAY_I32_ # op_upper : SUST_1D_ARRAY<"sust.b.a1d.b32." # op, B32>; + defm SUST_B_1D_ARRAY_I64_ # op_upper : SUST_1D_ARRAY<"sust.b.a1d.b64." # op, B64>; +} defm SUST_P_1D_ARRAY_I8_TRAP : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", B16>; defm SUST_P_1D_ARRAY_I16_TRAP : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", B16>; @@ -4164,20 +4019,13 @@ multiclass SUST_1D_ARRAY_V2 { def _I : SUST_1D_ARRAY_V2_base; } -defm SUST_B_1D_ARRAY_V2I8_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", B16>; -defm SUST_B_1D_ARRAY_V2I16_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", B16>; -defm SUST_B_1D_ARRAY_V2I32_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", B32>; -defm SUST_B_1D_ARRAY_V2I64_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", B64>; - -defm SUST_B_1D_ARRAY_V2I8_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", B16>; -defm SUST_B_1D_ARRAY_V2I16_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", B16>; -defm SUST_B_1D_ARRAY_V2I32_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", B32>; -defm SUST_B_1D_ARRAY_V2I64_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", B64>; - -defm SUST_B_1D_ARRAY_V2I8_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", B16>; -defm SUST_B_1D_ARRAY_V2I16_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", B16>; -defm SUST_B_1D_ARRAY_V2I32_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", B32>; -defm SUST_B_1D_ARRAY_V2I64_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_1D_ARRAY_V2I8_ # op_upper : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8." # op, B16>; + defm SUST_B_1D_ARRAY_V2I16_ # op_upper : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16." # op, B16>; + defm SUST_B_1D_ARRAY_V2I32_ # op_upper : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32." # op, B32>; + defm SUST_B_1D_ARRAY_V2I64_ # op_upper : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64." # op, B64>; +} defm SUST_P_1D_ARRAY_V2I8_TRAP : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", B16>; defm SUST_P_1D_ARRAY_V2I16_TRAP : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", B16>; @@ -4197,33 +4045,16 @@ multiclass SUST_1D_ARRAY_V4 { def _I : SUST_1D_ARRAY_V4_base; } -defm SUST_B_1D_ARRAY_V4I8_CLAMP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", B16>; -defm SUST_B_1D_ARRAY_V4I16_CLAMP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", B16>; -defm SUST_B_1D_ARRAY_V4I32_CLAMP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", B32>; - -defm SUST_B_1D_ARRAY_V4I8_TRAP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", B16>; -defm SUST_B_1D_ARRAY_V4I16_TRAP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", B16>; -defm SUST_B_1D_ARRAY_V4I32_TRAP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", B32>; - -defm SUST_B_1D_ARRAY_V4I8_ZERO - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", B16>; -defm SUST_B_1D_ARRAY_V4I16_ZERO - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", B16>; -defm SUST_B_1D_ARRAY_V4I32_ZERO - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", B32>; - -defm SUST_P_1D_ARRAY_V4I8_TRAP - : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", B16>; -defm SUST_P_1D_ARRAY_V4I16_TRAP - : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", B16>; -defm SUST_P_1D_ARRAY_V4I32_TRAP - : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", B32>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_1D_ARRAY_V4I8_ # op_upper : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8." # op, B16>; + defm SUST_B_1D_ARRAY_V4I16_ # op_upper : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16." # op, B16>; + defm SUST_B_1D_ARRAY_V4I32_ # op_upper : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32." # op, B32>; +} + +defm SUST_P_1D_ARRAY_V4I8_TRAP : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", B16>; +defm SUST_P_1D_ARRAY_V4I16_TRAP : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", B16>; +defm SUST_P_1D_ARRAY_V4I32_TRAP : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", B32>; class SUST_2D_base pat> : NVPTXInst<(outs), @@ -4237,20 +4068,13 @@ multiclass SUST_2D { def _I : SUST_2D_base; } -defm SUST_B_2D_I8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", B16>; -defm SUST_B_2D_I16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", B16>; -defm SUST_B_2D_I32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", B32>; -defm SUST_B_2D_I64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", B64>; - -defm SUST_B_2D_I8_TRAP : SUST_2D<"sust.b.2d.b8.trap", B16>; -defm SUST_B_2D_I16_TRAP : SUST_2D<"sust.b.2d.b16.trap", B16>; -defm SUST_B_2D_I32_TRAP : SUST_2D<"sust.b.2d.b32.trap", B32>; -defm SUST_B_2D_I64_TRAP : SUST_2D<"sust.b.2d.b64.trap", B64>; - -defm SUST_B_2D_I8_ZERO : SUST_2D<"sust.b.2d.b8.zero", B16>; -defm SUST_B_2D_I16_ZERO : SUST_2D<"sust.b.2d.b16.zero", B16>; -defm SUST_B_2D_I32_ZERO : SUST_2D<"sust.b.2d.b32.zero", B32>; -defm SUST_B_2D_I64_ZERO : SUST_2D<"sust.b.2d.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_2D_I8_ # op_upper : SUST_2D<"sust.b.2d.b8." # op, B16>; + defm SUST_B_2D_I16_ # op_upper : SUST_2D<"sust.b.2d.b16." # op, B16>; + defm SUST_B_2D_I32_ # op_upper : SUST_2D<"sust.b.2d.b32." # op, B32>; + defm SUST_B_2D_I64_ # op_upper : SUST_2D<"sust.b.2d.b64." # op, B64>; +} defm SUST_P_2D_I8_TRAP : SUST_2D<"sust.p.2d.b8.trap", B16>; defm SUST_P_2D_I16_TRAP : SUST_2D<"sust.p.2d.b16.trap", B16>; @@ -4270,20 +4094,13 @@ multiclass SUST_2D_V2 { def _I : SUST_2D_V2_base; } -defm SUST_B_2D_V2I8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", B16>; -defm SUST_B_2D_V2I16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", B16>; -defm SUST_B_2D_V2I32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", B32>; -defm SUST_B_2D_V2I64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", B64>; - -defm SUST_B_2D_V2I8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", B16>; -defm SUST_B_2D_V2I16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", B16>; -defm SUST_B_2D_V2I32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", B32>; -defm SUST_B_2D_V2I64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", B64>; - -defm SUST_B_2D_V2I8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", B16>; -defm SUST_B_2D_V2I16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", B16>; -defm SUST_B_2D_V2I32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", B32>; -defm SUST_B_2D_V2I64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_2D_V2I8_ # op_upper : SUST_2D_V2<"sust.b.2d.v2.b8." # op, B16>; + defm SUST_B_2D_V2I16_ # op_upper : SUST_2D_V2<"sust.b.2d.v2.b16." # op, B16>; + defm SUST_B_2D_V2I32_ # op_upper : SUST_2D_V2<"sust.b.2d.v2.b32." # op, B32>; + defm SUST_B_2D_V2I64_ # op_upper : SUST_2D_V2<"sust.b.2d.v2.b64." # op, B64>; +} defm SUST_P_2D_V2I8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", B16>; defm SUST_P_2D_V2I16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", B16>; @@ -4303,17 +4120,12 @@ multiclass SUST_2D_V4 { def _I : SUST_2D_V4_base; } -defm SUST_B_2D_V4I8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", B16>; -defm SUST_B_2D_V4I16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", B16>; -defm SUST_B_2D_V4I32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", B32>; - -defm SUST_B_2D_V4I8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", B16>; -defm SUST_B_2D_V4I16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", B16>; -defm SUST_B_2D_V4I32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", B32>; - -defm SUST_B_2D_V4I8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", B16>; -defm SUST_B_2D_V4I16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", B16>; -defm SUST_B_2D_V4I32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", B32>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_2D_V4I8_ # op_upper : SUST_2D_V4<"sust.b.2d.v4.b8." # op, B16>; + defm SUST_B_2D_V4I16_ # op_upper : SUST_2D_V4<"sust.b.2d.v4.b16." # op, B16>; + defm SUST_B_2D_V4I32_ # op_upper : SUST_2D_V4<"sust.b.2d.v4.b32." # op, B32>; +} defm SUST_P_2D_V4I8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", B16>; defm SUST_P_2D_V4I16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", B16>; @@ -4333,20 +4145,13 @@ multiclass SUST_2D_ARRAY { def _I : SUST_2D_ARRAY_base; } -defm SUST_B_2D_ARRAY_I8_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", B16>; -defm SUST_B_2D_ARRAY_I16_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", B16>; -defm SUST_B_2D_ARRAY_I32_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", B32>; -defm SUST_B_2D_ARRAY_I64_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", B64>; - -defm SUST_B_2D_ARRAY_I8_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", B16>; -defm SUST_B_2D_ARRAY_I16_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", B16>; -defm SUST_B_2D_ARRAY_I32_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", B32>; -defm SUST_B_2D_ARRAY_I64_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", B64>; - -defm SUST_B_2D_ARRAY_I8_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", B16>; -defm SUST_B_2D_ARRAY_I16_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", B16>; -defm SUST_B_2D_ARRAY_I32_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", B32>; -defm SUST_B_2D_ARRAY_I64_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_2D_ARRAY_I8_ # op_upper : SUST_2D_ARRAY<"sust.b.a2d.b8." # op, B16>; + defm SUST_B_2D_ARRAY_I16_ # op_upper : SUST_2D_ARRAY<"sust.b.a2d.b16." # op, B16>; + defm SUST_B_2D_ARRAY_I32_ # op_upper : SUST_2D_ARRAY<"sust.b.a2d.b32." # op, B32>; + defm SUST_B_2D_ARRAY_I64_ # op_upper : SUST_2D_ARRAY<"sust.b.a2d.b64." # op, B64>; +} defm SUST_P_2D_ARRAY_I8_TRAP : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", B16>; defm SUST_P_2D_ARRAY_I16_TRAP : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", B16>; @@ -4366,20 +4171,13 @@ multiclass SUST_2D_ARRAY_V2 { def _I : SUST_2D_ARRAY_V2_base; } -defm SUST_B_2D_ARRAY_V2I8_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", B16>; -defm SUST_B_2D_ARRAY_V2I16_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", B16>; -defm SUST_B_2D_ARRAY_V2I32_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", B32>; -defm SUST_B_2D_ARRAY_V2I64_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", B64>; - -defm SUST_B_2D_ARRAY_V2I8_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", B16>; -defm SUST_B_2D_ARRAY_V2I16_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", B16>; -defm SUST_B_2D_ARRAY_V2I32_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", B32>; -defm SUST_B_2D_ARRAY_V2I64_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", B64>; - -defm SUST_B_2D_ARRAY_V2I8_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", B16>; -defm SUST_B_2D_ARRAY_V2I16_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", B16>; -defm SUST_B_2D_ARRAY_V2I32_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", B32>; -defm SUST_B_2D_ARRAY_V2I64_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", B64>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_2D_ARRAY_V2I8_ # op_upper : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8." # op, B16>; + defm SUST_B_2D_ARRAY_V2I16_ # op_upper : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16." # op, B16>; + defm SUST_B_2D_ARRAY_V2I32_ # op_upper : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32." # op, B32>; + defm SUST_B_2D_ARRAY_V2I64_ # op_upper : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64." # op, B64>; +} defm SUST_P_2D_ARRAY_V2I8_TRAP : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", B16>; defm SUST_P_2D_ARRAY_V2I16_TRAP : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", B16>; @@ -4399,17 +4197,12 @@ multiclass SUST_2D_ARRAY_V4 { def _I : SUST_2D_ARRAY_V4_base; } -defm SUST_B_2D_ARRAY_V4I8_CLAMP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", B16>; -defm SUST_B_2D_ARRAY_V4I16_CLAMP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", B16>; -defm SUST_B_2D_ARRAY_V4I32_CLAMP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", B32>; - -defm SUST_B_2D_ARRAY_V4I8_TRAP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", B16>; -defm SUST_B_2D_ARRAY_V4I16_TRAP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", B16>; -defm SUST_B_2D_ARRAY_V4I32_TRAP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", B32>; - -defm SUST_B_2D_ARRAY_V4I8_ZERO : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", B16>; -defm SUST_B_2D_ARRAY_V4I16_ZERO : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", B16>; -defm SUST_B_2D_ARRAY_V4I32_ZERO : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", B32>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_2D_ARRAY_V4I8_ # op_upper : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8." # op, B16>; + defm SUST_B_2D_ARRAY_V4I16_ # op_upper : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16." # op, B16>; + defm SUST_B_2D_ARRAY_V4I32_ # op_upper : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32." # op, B32>; +} defm SUST_P_2D_ARRAY_V4I8_TRAP : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", B16>; defm SUST_P_2D_ARRAY_V4I16_TRAP : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", B16>; @@ -4429,21 +4222,13 @@ multiclass SUST_3D { def _I : SUST_3D_base; } -defm SUST_B_3D_I8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", B16>; -defm SUST_B_3D_I16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", B16>; -defm SUST_B_3D_I32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", B32>; -defm SUST_B_3D_I64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", B64>; - -defm SUST_B_3D_I8_TRAP : SUST_3D<"sust.b.3d.b8.trap", B16>; -defm SUST_B_3D_I16_TRAP : SUST_3D<"sust.b.3d.b16.trap", B16>; -defm SUST_B_3D_I32_TRAP : SUST_3D<"sust.b.3d.b32.trap", B32>; -defm SUST_B_3D_I64_TRAP : SUST_3D<"sust.b.3d.b64.trap", B64>; - -defm SUST_B_3D_I8_ZERO : SUST_3D<"sust.b.3d.b8.zero", B16>; -defm SUST_B_3D_I16_ZERO : SUST_3D<"sust.b.3d.b16.zero", B16>; -defm SUST_B_3D_I32_ZERO : SUST_3D<"sust.b.3d.b32.zero", B32>; -defm SUST_B_3D_I64_ZERO : SUST_3D<"sust.b.3d.b64.zero", B64>; - +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_3D_I8_ # op_upper : SUST_3D<"sust.b.3d.b8." # op, B16>; + defm SUST_B_3D_I16_ # op_upper : SUST_3D<"sust.b.3d.b16." # op, B16>; + defm SUST_B_3D_I32_ # op_upper : SUST_3D<"sust.b.3d.b32." # op, B32>; + defm SUST_B_3D_I64_ # op_upper : SUST_3D<"sust.b.3d.b64." # op, B64>; +} defm SUST_P_3D_I8_TRAP : SUST_3D<"sust.p.3d.b8.trap", B16>; defm SUST_P_3D_I16_TRAP : SUST_3D<"sust.p.3d.b16.trap", B16>; defm SUST_P_3D_I32_TRAP : SUST_3D<"sust.p.3d.b32.trap", B32>; @@ -4462,21 +4247,13 @@ multiclass SUST_3D_V2 { def _I : SUST_3D_V2_base; } -defm SUST_B_3D_V2I8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", B16>; -defm SUST_B_3D_V2I16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", B16>; -defm SUST_B_3D_V2I32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", B32>; -defm SUST_B_3D_V2I64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", B64>; - -defm SUST_B_3D_V2I8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", B16>; -defm SUST_B_3D_V2I16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", B16>; -defm SUST_B_3D_V2I32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", B32>; -defm SUST_B_3D_V2I64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", B64>; - -defm SUST_B_3D_V2I8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", B16>; -defm SUST_B_3D_V2I16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", B16>; -defm SUST_B_3D_V2I32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", B32>; -defm SUST_B_3D_V2I64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", B64>; - +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_3D_V2I8_ # op_upper : SUST_3D_V2<"sust.b.3d.v2.b8." # op, B16>; + defm SUST_B_3D_V2I16_ # op_upper : SUST_3D_V2<"sust.b.3d.v2.b16." # op, B16>; + defm SUST_B_3D_V2I32_ # op_upper : SUST_3D_V2<"sust.b.3d.v2.b32." # op, B32>; + defm SUST_B_3D_V2I64_ # op_upper : SUST_3D_V2<"sust.b.3d.v2.b64." # op, B64>; +} defm SUST_P_3D_V2I8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", B16>; defm SUST_P_3D_V2I16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", B16>; defm SUST_P_3D_V2I32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", B32>; @@ -4495,17 +4272,12 @@ multiclass SUST_3D_V4 { def _I : SUST_3D_V4_base; } -defm SUST_B_3D_V4I8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", B16>; -defm SUST_B_3D_V4I16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", B16>; -defm SUST_B_3D_V4I32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", B32>; - -defm SUST_B_3D_V4I8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", B16>; -defm SUST_B_3D_V4I16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", B16>; -defm SUST_B_3D_V4I32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", B32>; - -defm SUST_B_3D_V4I8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", B16>; -defm SUST_B_3D_V4I16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", B16>; -defm SUST_B_3D_V4I32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", B32>; +foreach op = ["clamp", "trap", "zero"] in { + defvar op_upper = !toupper(op); + defm SUST_B_3D_V4I8_ # op_upper : SUST_3D_V4<"sust.b.3d.v4.b8." # op, B16>; + defm SUST_B_3D_V4I16_ # op_upper : SUST_3D_V4<"sust.b.3d.v4.b16." # op, B16>; + defm SUST_B_3D_V4I32_ # op_upper : SUST_3D_V4<"sust.b.3d.v4.b32." # op, B32>; +} defm SUST_P_3D_V4I8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", B16>; defm SUST_P_3D_V4I16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", B16>; @@ -5122,27 +4894,23 @@ defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_align // // WGMMA fence instructions // -let isConvergent = true in { -def INT_NVVM_WGMMA_FENCE_SYNC_ALIGNED : BasicNVPTXInst<(outs), (ins), "wgmma.fence.sync.aligned", - [(int_nvvm_wgmma_fence_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>; +let isConvergent = true, Predicates = [hasSM90a, hasPTX<80>] in { + def WGMMA_FENCE_SYNC_ALIGNED : NullaryInst<"wgmma.fence.sync.aligned", int_nvvm_wgmma_fence_sync_aligned>; -def INT_NVVM_WGMMA_COMMIT_GROUP_SYNC_ALIGNED : BasicNVPTXInst<(outs), (ins), "wgmma.commit_group.sync.aligned", - [(int_nvvm_wgmma_commit_group_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>; + def WGMMA_COMMIT_GROUP_SYNC_ALIGNED : NullaryInst<"wgmma.commit_group.sync.aligned", int_nvvm_wgmma_commit_group_sync_aligned>; -def INT_NVVM_WGMMA_WAIT_GROUP_SYNC_ALIGNED : BasicNVPTXInst<(outs), (ins i64imm:$n), "wgmma.wait_group.sync.aligned", - [(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>, Requires<[hasSM90a, hasPTX<80>]>; -} // isConvergent = true + def WGMMA_WAIT_GROUP_SYNC_ALIGNED : BasicNVPTXInst<(outs), (ins i64imm:$n), "wgmma.wait_group.sync.aligned", + [(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>; +} let Predicates = [hasSM<90>, hasPTX<78>] in { def GRIDDEPCONTROL_LAUNCH_DEPENDENTS : - BasicNVPTXInst<(outs), (ins), "griddepcontrol.launch_dependents", - [(int_nvvm_griddepcontrol_launch_dependents)]>; + NullaryInst<"griddepcontrol.launch_dependents", int_nvvm_griddepcontrol_launch_dependents>; def GRIDDEPCONTROL_WAIT : - BasicNVPTXInst<(outs), (ins), "griddepcontrol.wait", - [(int_nvvm_griddepcontrol_wait)]>; + NullaryInst<"griddepcontrol.wait", int_nvvm_griddepcontrol_wait>; } -def INT_EXIT : BasicNVPTXInst<(outs), (ins), "exit", [(int_nvvm_exit)]>; +def EXIT : NullaryInst<"exit", int_nvvm_exit>; // Tcgen05 intrinsics let isConvergent = true, Predicates = [hasTcgen05Instructions] in { @@ -5170,9 +4938,7 @@ defm TCGEN05_DEALLOC_CG1: TCGEN05_DEALLOC_INTR<"1", int_nvvm_tcgen05_dealloc_cg1 defm TCGEN05_DEALLOC_CG2: TCGEN05_DEALLOC_INTR<"2", int_nvvm_tcgen05_dealloc_cg2>; multiclass TCGEN05_RELINQ_PERMIT_INTR { - def "" : BasicNVPTXInst<(outs), (ins), - "tcgen05.relinquish_alloc_permit.cta_group::" # num # ".sync.aligned", - [(Intr)]>; + def "" : NullaryInst<"tcgen05.relinquish_alloc_permit.cta_group::" # num # ".sync.aligned", Intr>; } defm TCGEN05_RELINQ_CG1: TCGEN05_RELINQ_PERMIT_INTR<"1", int_nvvm_tcgen05_relinq_alloc_permit_cg1>; defm TCGEN05_RELINQ_CG2: TCGEN05_RELINQ_PERMIT_INTR<"2", int_nvvm_tcgen05_relinq_alloc_permit_cg2>; diff --git a/llvm/test/CodeGen/NVPTX/branch-fold.mir b/llvm/test/CodeGen/NVPTX/branch-fold.mir index ca6f49feff052..c9abe3faf228e 100644 --- a/llvm/test/CodeGen/NVPTX/branch-fold.mir +++ b/llvm/test/CodeGen/NVPTX/branch-fold.mir @@ -57,7 +57,7 @@ body: | ; CHECK-NEXT: bb.2.bb1: ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[ADDi64ri:%[0-9]+]]:b64 = ADDi64ri [[ADDi64ri]], 1 + ; CHECK-NEXT: [[ADDi64ri:%[0-9]+]]:b64 = ADD64ri [[ADDi64ri]], 1 ; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:b1 = SETP_i64ri [[ADDi64ri]], 1, 2 ; CHECK-NEXT: CBranch [[SETP_s64ri]], %bb.2 ; CHECK-NEXT: {{ $}} @@ -76,7 +76,7 @@ body: | bb.2.bb1: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - %5:b64 = ADDi64ri %5, 1 + %5:b64 = ADD64ri %5, 1 %4:b1 = SETP_i64ri %5, 1, 2 CBranch %4, %bb.2