diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 9af6fb2cb198e..38912a7f09e30 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -215,15 +215,15 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { // Encode the register class in the upper 4 bits // Must be kept in sync with NVPTXInstPrinter::printRegName unsigned Ret = 0; - if (RC == &NVPTX::Int1RegsRegClass) { + if (RC == &NVPTX::B1RegClass) { Ret = (1 << 28); - } else if (RC == &NVPTX::Int16RegsRegClass) { + } else if (RC == &NVPTX::B16RegClass) { Ret = (2 << 28); - } else if (RC == &NVPTX::Int32RegsRegClass) { + } else if (RC == &NVPTX::B32RegClass) { Ret = (3 << 28); - } else if (RC == &NVPTX::Int64RegsRegClass) { + } else if (RC == &NVPTX::B64RegClass) { Ret = (4 << 28); - } else if (RC == &NVPTX::Int128RegsRegClass) { + } else if (RC == &NVPTX::B128RegClass) { Ret = (7 << 28); } else { report_fatal_error("Bad register class"); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 492f4ab76fdbb..676654d6d33e7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -589,18 +589,18 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(Op, VT, IsOpSupported ? Action : NoI16x2Action); }; - addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); - addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); - addRegisterClass(MVT::v2i16, &NVPTX::Int32RegsRegClass); - addRegisterClass(MVT::v4i8, &NVPTX::Int32RegsRegClass); - addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); - addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); - addRegisterClass(MVT::f32, &NVPTX::Int32RegsRegClass); - addRegisterClass(MVT::f64, &NVPTX::Int64RegsRegClass); - addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass); - addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass); - addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass); - addRegisterClass(MVT::v2bf16, &NVPTX::Int32RegsRegClass); + addRegisterClass(MVT::i1, &NVPTX::B1RegClass); + addRegisterClass(MVT::i16, &NVPTX::B16RegClass); + addRegisterClass(MVT::v2i16, &NVPTX::B32RegClass); + addRegisterClass(MVT::v4i8, &NVPTX::B32RegClass); + addRegisterClass(MVT::i32, &NVPTX::B32RegClass); + addRegisterClass(MVT::i64, &NVPTX::B64RegClass); + addRegisterClass(MVT::f32, &NVPTX::B32RegClass); + addRegisterClass(MVT::f64, &NVPTX::B64RegClass); + addRegisterClass(MVT::f16, &NVPTX::B16RegClass); + addRegisterClass(MVT::v2f16, &NVPTX::B32RegClass); + addRegisterClass(MVT::bf16, &NVPTX::B16RegClass); + addRegisterClass(MVT::v2bf16, &NVPTX::B32RegClass); // Conversion to/from FP16/FP16x2 is always legal. setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom); @@ -4866,22 +4866,22 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (Constraint.size() == 1) { switch (Constraint[0]) { case 'b': - return std::make_pair(0U, &NVPTX::Int1RegsRegClass); + return std::make_pair(0U, &NVPTX::B1RegClass); case 'c': case 'h': - return std::make_pair(0U, &NVPTX::Int16RegsRegClass); + return std::make_pair(0U, &NVPTX::B16RegClass); case 'r': case 'f': - return std::make_pair(0U, &NVPTX::Int32RegsRegClass); + return std::make_pair(0U, &NVPTX::B32RegClass); case 'l': case 'N': case 'd': - return std::make_pair(0U, &NVPTX::Int64RegsRegClass); + return std::make_pair(0U, &NVPTX::B64RegClass); case 'q': { if (STI.getSmVersion() < 70) report_fatal_error("Inline asm with 128 bit operands is only " "supported for sm_70 and higher!"); - return std::make_pair(0U, &NVPTX::Int128RegsRegClass); + return std::make_pair(0U, &NVPTX::B128RegClass); } } } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index f262a0fb66c25..bf84d1dca4ed5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -39,15 +39,15 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, report_fatal_error("Copy one register into another with a different width"); unsigned Op; - if (DestRC == &NVPTX::Int1RegsRegClass) { + if (DestRC == &NVPTX::B1RegClass) { Op = NVPTX::IMOV1r; - } else if (DestRC == &NVPTX::Int16RegsRegClass) { + } else if (DestRC == &NVPTX::B16RegClass) { Op = NVPTX::MOV16r; - } else if (DestRC == &NVPTX::Int32RegsRegClass) { + } else if (DestRC == &NVPTX::B32RegClass) { Op = NVPTX::IMOV32r; - } else if (DestRC == &NVPTX::Int64RegsRegClass) { + } else if (DestRC == &NVPTX::B64RegClass) { Op = NVPTX::IMOV64r; - } else if (DestRC == &NVPTX::Int128RegsRegClass) { + } else if (DestRC == &NVPTX::B128RegClass) { Op = NVPTX::IMOV128r; } else { llvm_unreachable("Bad register copy"); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index bbe99dec5c445..5979054764647 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -170,29 +170,6 @@ def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" def useFP16Math: Predicate<"Subtarget->allowFP16Math()">; def hasBF16Math: Predicate<"Subtarget->hasBF16Math()">; -// Helper class to aid conversion between ValueType and a matching RegisterClass. - -class ValueToRegClass { - string name = !cast(T); - NVPTXRegClass ret = !cond( - !eq(name, "i1"): Int1Regs, - !eq(name, "i16"): Int16Regs, - !eq(name, "v2i16"): Int32Regs, - !eq(name, "i32"): Int32Regs, - !eq(name, "i64"): Int64Regs, - !eq(name, "f16"): Int16Regs, - !eq(name, "v2f16"): Int32Regs, - !eq(name, "bf16"): Int16Regs, - !eq(name, "v2bf16"): Int32Regs, - !eq(name, "f32"): Float32Regs, - !eq(name, "f64"): Float64Regs, - !eq(name, "ai32"): Int32ArgRegs, - !eq(name, "ai64"): Int64ArgRegs, - !eq(name, "af32"): Float32ArgRegs, - !eq(name, "if64"): Float64ArgRegs, - ); -} - //===----------------------------------------------------------------------===// // Some Common Instruction Class Templates @@ -219,18 +196,18 @@ class RegTyInfo; -def I16RT : RegTyInfo; -def I32RT : RegTyInfo; -def I64RT : RegTyInfo; +def I1RT : RegTyInfo; +def I16RT : RegTyInfo; +def I32RT : RegTyInfo; +def I64RT : RegTyInfo; -def F32RT : RegTyInfo; -def F64RT : RegTyInfo; -def F16RT : RegTyInfo; -def BF16RT : RegTyInfo; +def F32RT : RegTyInfo; +def F64RT : RegTyInfo; +def F16RT : RegTyInfo; +def BF16RT : RegTyInfo; -def F16X2RT : RegTyInfo; -def BF16X2RT : RegTyInfo; +def F16X2RT : RegTyInfo; +def BF16X2RT : RegTyInfo; // This class provides a basic wrapper around an NVPTXInst that abstracts the @@ -238,18 +215,18 @@ def BF16X2RT : RegTyInfo; // construction of the asm string based on the provided dag arguments. // For example, the following asm-strings would be computed: // -// * BasicFlagsNVPTXInst<(outs Int32Regs:$dst), -// (ins Int32Regs:$a, Int32Regs:$b), (ins), +// * BasicFlagsNVPTXInst<(outs B32:$dst), +// (ins B32:$a, B32:$b), (ins), // "add.s32">; // ---> "add.s32 \t$dst, $a, $b;" // -// * BasicFlagsNVPTXInst<(outs Int32Regs:$d), -// (ins Int32Regs:$a, Int32Regs:$b, Hexu32imm:$c), +// * BasicFlagsNVPTXInst<(outs B32:$d), +// (ins B32:$a, B32:$b, Hexu32imm:$c), // (ins PrmtMode:$mode), // "prmt.b32${mode}">; // ---> "prmt.b32${mode} \t$d, $a, $b, $c;" // -// * BasicFlagsNVPTXInst<(outs Int64Regs:$state), +// * BasicFlagsNVPTXInst<(outs B64:$state), // (ins ADDR:$addr), // "mbarrier.arrive.b64">; // ---> "mbarrier.arrive.b64 \t$state, [$addr];" @@ -312,7 +289,7 @@ multiclass I3 { } class I16x2 : - BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), OpcStr # "16x2", [(set v2i16:$dst, (OpNode v2i16:$a, v2i16:$b))]>, Requires<[hasPTX<80>, hasSM<90>]>; @@ -334,73 +311,73 @@ multiclass ADD_SUB_INT_CARRY { multiclass FMINIMUMMAXIMUM { if !not(NaN) then { def f64rr : - BasicNVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), + BasicNVPTXInst<(outs B64:$dst), + (ins B64:$a, B64:$b), OpcStr # ".f64", [(set f64:$dst, (OpNode f64:$a, f64:$b))]>; def f64ri : - BasicNVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), + BasicNVPTXInst<(outs B64:$dst), + (ins B64:$a, f64imm:$b), OpcStr # ".f64", [(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>; } def f32rr_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), OpcStr # ".ftz.f32", [(set f32:$dst, (OpNode f32:$a, f32:$b))]>, Requires<[doF32FTZ]>; def f32ri_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, f32imm:$b), OpcStr # ".ftz.f32", [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>, Requires<[doF32FTZ]>; def f32rr : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), OpcStr # ".f32", [(set f32:$dst, (OpNode f32:$a, f32:$b))]>; def f32ri : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, f32imm:$b), OpcStr # ".f32", [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>; def f16rr_ftz : - BasicNVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b), + BasicNVPTXInst<(outs B16:$dst), + (ins B16:$a, B16:$b), OpcStr # ".ftz.f16", [(set f16:$dst, (OpNode f16:$a, f16:$b))]>, Requires<[useFP16Math, doF32FTZ]>; def f16rr : - BasicNVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b), + BasicNVPTXInst<(outs B16:$dst), + (ins B16:$a, B16:$b), OpcStr # ".f16", [(set f16:$dst, (OpNode f16:$a, f16:$b))]>, Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>; def f16x2rr_ftz : - BasicNVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), OpcStr # ".ftz.f16x2", [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math, hasSM<80>, hasPTX<70>, doF32FTZ]>; def f16x2rr : - BasicNVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), OpcStr # ".f16x2", [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>; def bf16rr : - BasicNVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b), + BasicNVPTXInst<(outs B16:$dst), + (ins B16:$a, B16:$b), OpcStr # ".bf16", [(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>, Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>; def bf16x2rr : - BasicNVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), OpcStr # ".bf16x2", [(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>, Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>; @@ -417,73 +394,73 @@ multiclass FMINIMUMMAXIMUM { // just like the non ".rn" op, but prevents ptxas from creating FMAs. multiclass F3 { def f64rr : - BasicNVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), + BasicNVPTXInst<(outs B64:$dst), + (ins B64:$a, B64:$b), op_str # ".f64", [(set f64:$dst, (op_pat f64:$a, f64:$b))]>; def f64ri : - BasicNVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), + BasicNVPTXInst<(outs B64:$dst), + (ins B64:$a, f64imm:$b), op_str # ".f64", [(set f64:$dst, (op_pat f64:$a, fpimm:$b))]>; def f32rr_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), op_str # ".ftz.f32", [(set f32:$dst, (op_pat f32:$a, f32:$b))]>, Requires<[doF32FTZ]>; def f32ri_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, f32imm:$b), op_str # ".ftz.f32", [(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>, Requires<[doF32FTZ]>; def f32rr : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), op_str # ".f32", [(set f32:$dst, (op_pat f32:$a, f32:$b))]>; def f32ri : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, f32imm:$b), op_str # ".f32", [(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>; def f16rr_ftz : - BasicNVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b), + BasicNVPTXInst<(outs B16:$dst), + (ins B16:$a, B16:$b), op_str # ".ftz.f16", [(set f16:$dst, (op_pat f16:$a, f16:$b))]>, Requires<[useFP16Math, doF32FTZ]>; def f16rr : - BasicNVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b), + BasicNVPTXInst<(outs B16:$dst), + (ins B16:$a, B16:$b), op_str # ".f16", [(set f16:$dst, (op_pat f16:$a, f16:$b))]>, Requires<[useFP16Math]>; def f16x2rr_ftz : - BasicNVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), op_str # ".ftz.f16x2", [(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math, doF32FTZ]>; def f16x2rr : - BasicNVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), op_str # ".f16x2", [(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math]>; def bf16rr : - BasicNVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b), + BasicNVPTXInst<(outs B16:$dst), + (ins B16:$a, B16:$b), op_str # ".bf16", [(set bf16:$dst, (op_pat bf16:$a, bf16:$b))]>, Requires<[hasBF16Math]>; def bf16x2rr : - BasicNVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), op_str # ".bf16x2", [(set v2bf16:$dst, (op_pat v2bf16:$a, v2bf16:$b))]>, Requires<[hasBF16Math]>; @@ -504,40 +481,40 @@ multiclass F3_fma_component { // instructions: .f64, .f32, and .ftz.f32 (flush // subnormal inputs and results to zero). multiclass F2 { - def f64 : BasicNVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), + def f64 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$a), OpcStr # ".f64", [(set f64:$dst, (OpNode f64:$a))]>; - def f32_ftz : BasicNVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), + def f32_ftz : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), OpcStr # ".ftz.f32", [(set f32:$dst, (OpNode f32:$a))]>, Requires<[doF32FTZ]>; - def f32 : BasicNVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), + def f32 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), OpcStr # ".f32", [(set f32:$dst, (OpNode f32:$a))]>; } multiclass F2_Support_Half { - def bf16 : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a), + def bf16 : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a), OpcStr # ".bf16", [(set bf16:$dst, (OpNode bf16:$a))]>, Requires<[hasSM<80>, hasPTX<70>]>; - def bf16x2 : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), + def bf16x2 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), OpcStr # ".bf16x2", [(set v2bf16:$dst, (OpNode v2bf16:$a))]>, Requires<[hasSM<80>, hasPTX<70>]>; - def f16_ftz : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a), + def f16_ftz : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a), OpcStr # ".ftz.f16", [(set f16:$dst, (OpNode f16:$a))]>, Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>; - def f16x2_ftz : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), + def f16x2_ftz : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), OpcStr # ".ftz.f16x2", [(set v2f16:$dst, (OpNode v2f16:$a))]>, Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>; - def f16 : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a), + def f16 : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a), OpcStr # ".f16", [(set f16:$dst, (OpNode f16:$a))]>, Requires<[hasSM<53>, hasPTX<65>]>; - def f16x2 : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), + def f16x2 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), OpcStr # ".f16x2", [(set v2f16:$dst, (OpNode v2f16:$a))]>, Requires<[hasSM<53>, hasPTX<65>]>; @@ -546,11 +523,11 @@ multiclass F2_Support_Half { // Variant where only .ftz.bf16 is supported. multiclass F2_Support_Half_BF { - def bf16_ftz : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a), + def bf16_ftz : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a), OpcStr # ".ftz.bf16", [(set bf16:$dst, (OpNode bf16:$a))]>, Requires<[hasSM<90>, hasPTX<78>]>; - def bf16x2_ftz: BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), + def bf16x2_ftz: BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), OpcStr # ".ftz.bf16x2", [(set v2bf16:$dst, (OpNode v2bf16:$a))]>, Requires<[hasSM<90>, hasPTX<78>]>; @@ -571,52 +548,52 @@ let hasSideEffects = false in { multiclass CVT_FROM_ALL Preds = []> { def _s8 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src), (ins CvtMode:$mode), + (ins B16:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".s8">, Requires; def _u8 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src), (ins CvtMode:$mode), + (ins B16:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".u8">, Requires; def _s16 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src), (ins CvtMode:$mode), + (ins B16:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".s16">, Requires; def _u16 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src), (ins CvtMode:$mode), + (ins B16:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".u16">, Requires; def _s32 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Int32Regs:$src), (ins CvtMode:$mode), + (ins B32:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".s32">, Requires; def _u32 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Int32Regs:$src), (ins CvtMode:$mode), + (ins B32:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".u32">, Requires; def _s64 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Int64Regs:$src), (ins CvtMode:$mode), + (ins B64:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".s64">, Requires; def _u64 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Int64Regs:$src), (ins CvtMode:$mode), + (ins B64:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".u64">, Requires; def _f16 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src), (ins CvtMode:$mode), + (ins B16:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".f16">, Requires; def _bf16 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src), (ins CvtMode:$mode), + (ins B16:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:relu}${mode:sat}." # ToType # ".bf16">, Requiresf32 was introduced early. @@ -625,7 +602,7 @@ let hasSideEffects = false in { [hasPTX<78>, hasSM<90>])>; def _f32 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Float32Regs:$src), (ins CvtMode:$mode), + (ins B32:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:relu}${mode:sat}." # ToType # ".f32">, Requiresbf16 was introduced early. @@ -633,61 +610,61 @@ let hasSideEffects = false in { Preds)>; def _f64 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Float64Regs:$src), (ins CvtMode:$mode), + (ins B64:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".f64">, Requires; } // Generate cvts from all types to all types. - defm CVT_s8 : CVT_FROM_ALL<"s8", Int16Regs>; - defm CVT_u8 : CVT_FROM_ALL<"u8", Int16Regs>; - defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>; - defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>; - defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>; - defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; - defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; - defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; - defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; - defm CVT_bf16 : CVT_FROM_ALL<"bf16", Int16Regs, [hasPTX<78>, hasSM<90>]>; - defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; - defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; + defm CVT_s8 : CVT_FROM_ALL<"s8", B16>; + defm CVT_u8 : CVT_FROM_ALL<"u8", B16>; + defm CVT_s16 : CVT_FROM_ALL<"s16", B16>; + defm CVT_u16 : CVT_FROM_ALL<"u16", B16>; + defm CVT_s32 : CVT_FROM_ALL<"s32", B32>; + defm CVT_u32 : CVT_FROM_ALL<"u32", B32>; + defm CVT_s64 : CVT_FROM_ALL<"s64", B64>; + defm CVT_u64 : CVT_FROM_ALL<"u64", B64>; + defm CVT_f16 : CVT_FROM_ALL<"f16", B16>; + defm CVT_bf16 : CVT_FROM_ALL<"bf16", B16, [hasPTX<78>, hasSM<90>]>; + defm CVT_f32 : CVT_FROM_ALL<"f32", B32>; + defm CVT_f64 : CVT_FROM_ALL<"f64", B64>; // These cvts are different from those above: The source and dest registers // are of the same type. - def CVT_INREG_s16_s8 : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + def CVT_INREG_s16_s8 : BasicNVPTXInst<(outs B16:$dst), (ins B16:$src), "cvt.s16.s8">; - def CVT_INREG_s32_s8 : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + def CVT_INREG_s32_s8 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), "cvt.s32.s8">; - def CVT_INREG_s32_s16 : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + def CVT_INREG_s32_s16 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), "cvt.s32.s16">; - def CVT_INREG_s64_s8 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + def CVT_INREG_s64_s8 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$src), "cvt.s64.s8">; - def CVT_INREG_s64_s16 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + def CVT_INREG_s64_s16 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$src), "cvt.s64.s16">; - def CVT_INREG_s64_s32 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + def CVT_INREG_s64_s32 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$src), "cvt.s64.s32">; multiclass CVT_FROM_FLOAT_V2_SM80 { def _f32 : BasicFlagsNVPTXInst<(outs RC:$dst), - (ins Float32Regs:$src1, Float32Regs:$src2), (ins CvtMode:$mode), + (ins B32:$src1, B32:$src2), (ins CvtMode:$mode), "cvt${mode:base}${mode:relu}." # FromName # ".f32">, Requires<[hasPTX<70>, hasSM<80>]>; } - defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Int32Regs>; - defm CVT_bf16x2 : CVT_FROM_FLOAT_V2_SM80<"bf16x2", Int32Regs>; + defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", B32>; + defm CVT_bf16x2 : CVT_FROM_FLOAT_V2_SM80<"bf16x2", B32>; // FP8 conversions. multiclass CVT_TO_F8X2 { def _f32 : - BasicFlagsNVPTXInst<(outs Int16Regs:$dst), - (ins Float32Regs:$src1, Float32Regs:$src2), (ins CvtMode:$mode), + BasicFlagsNVPTXInst<(outs B16:$dst), + (ins B32:$src1, B32:$src2), (ins CvtMode:$mode), "cvt${mode:base}.satfinite${mode:relu}." # F8Name # "x2.f32">, Requires<[hasPTX<81>, hasSM<89>]>; def _f16x2 : - BasicFlagsNVPTXInst<(outs Int16Regs:$dst), - (ins Int32Regs:$src), (ins CvtMode:$mode), + BasicFlagsNVPTXInst<(outs B16:$dst), + (ins B32:$src), (ins CvtMode:$mode), "cvt${mode:base}.satfinite${mode:relu}." # F8Name # "x2.f16x2">, Requires<[hasPTX<81>, hasSM<89>]>; } @@ -696,8 +673,8 @@ let hasSideEffects = false in { defm CVT_e5m2x2 : CVT_TO_F8X2<"e5m2">; class CVT_f16x2_fp8 : - BasicFlagsNVPTXInst<(outs Int32Regs:$dst), - (ins Int16Regs:$src), (ins CvtMode:$mode), + BasicFlagsNVPTXInst<(outs B32:$dst), + (ins B16:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:relu}.f16x2." # F8Name # "x2">, Requires<[hasPTX<81>, hasSM<89>]>; @@ -708,7 +685,7 @@ let hasSideEffects = false in { multiclass CVT_TO_TF32 Preds = [hasPTX<78>, hasSM<90>]> { defvar Intr = !cast("int_nvvm_f2tf32_" # !subst(".", "_", Modifier)); - def NAME : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$src), + def NAME : BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), "cvt." # Modifier # ".tf32.f32", [(set i32:$dst, (Intr f32:$src))]>, Requires; @@ -728,25 +705,25 @@ let hasSideEffects = false in { // FP6 conversions. foreach type = ["e2m3x2", "e3m2x2"] in { - def CVT_ # type # _f32_sf : BasicFlagsNVPTXInst<(outs Int16Regs:$dst), - (ins Float32Regs:$src1, Float32Regs:$src2), (ins CvtMode:$mode), + def CVT_ # type # _f32_sf : BasicFlagsNVPTXInst<(outs B16:$dst), + (ins B32:$src1, B32:$src2), (ins CvtMode:$mode), "cvt${mode:base}.satfinite${mode:relu}." # type # ".f32">; - def CVT_f16x2_ # type : BasicFlagsNVPTXInst<(outs Int32Regs:$dst), - (ins Int16Regs:$src), (ins CvtMode:$mode), + def CVT_f16x2_ # type : BasicFlagsNVPTXInst<(outs B32:$dst), + (ins B16:$src), (ins CvtMode:$mode), "cvt${mode:base}${mode:relu}.f16x2." # type>; } // FP4 conversions. - def CVT_e2m1x2_f32_sf : NVPTXInst<(outs Int16Regs:$dst), - (ins Float32Regs:$src1, Float32Regs:$src2, CvtMode:$mode), + def CVT_e2m1x2_f32_sf : NVPTXInst<(outs B16:$dst), + (ins B32:$src1, B32:$src2, CvtMode:$mode), !strconcat("{{ \n\t", ".reg .b8 \t%e2m1x2_out; \n\t", "cvt${mode:base}.satfinite${mode:relu}.e2m1x2.f32 \t%e2m1x2_out, $src1, $src2; \n\t", "cvt.u16.u8 \t$dst, %e2m1x2_out; \n\t", "}}"), []>; - def CVT_f16x2_e2m1x2 : NVPTXInst<(outs Int32Regs:$dst), - (ins Int16Regs:$src, CvtMode:$mode), + def CVT_f16x2_e2m1x2 : NVPTXInst<(outs B32:$dst), + (ins B16:$src, CvtMode:$mode), !strconcat("{{ \n\t", ".reg .b8 \t%e2m1x2_in; \n\t", "cvt.u8.u16 \t%e2m1x2_in, $src; \n\t", @@ -755,13 +732,13 @@ let hasSideEffects = false in { // UE8M0x2 conversions. class CVT_f32_to_ue8m0x2 : - BasicFlagsNVPTXInst<(outs Int16Regs:$dst), - (ins Float32Regs:$src1, Float32Regs:$src2), (ins CvtMode:$mode), + BasicFlagsNVPTXInst<(outs B16:$dst), + (ins B32:$src1, B32:$src2), (ins CvtMode:$mode), "cvt${mode:base}" # sat # ".ue8m0x2.f32">; class CVT_bf16x2_to_ue8m0x2 : - BasicFlagsNVPTXInst<(outs Int16Regs:$dst), - (ins Int32Regs:$src), (ins CvtMode:$mode), + BasicFlagsNVPTXInst<(outs B16:$dst), + (ins B32:$src), (ins CvtMode:$mode), "cvt${mode:base}" # sat # ".ue8m0x2.bf16x2">; def CVT_ue8m0x2_f32 : CVT_f32_to_ue8m0x2; @@ -770,8 +747,8 @@ let hasSideEffects = false in { def CVT_ue8m0x2_bf16x2_sf : CVT_bf16x2_to_ue8m0x2<".satfinite">; def CVT_bf16x2_ue8m0x2 : - BasicNVPTXInst<(outs Int32Regs:$dst), - (ins Int16Regs:$src), + BasicNVPTXInst<(outs B32:$dst), + (ins B16:$src), "cvt.rn.bf16x2.ue8m0x2">; } @@ -800,22 +777,22 @@ let hasSideEffects = false in { defvar asm_str = "selp." # TypeStr; def rr : BasicNVPTXInst<(outs t.RC:$dst), - (ins t.RC:$a, t.RC:$b, Int1Regs:$p), + (ins t.RC:$a, t.RC:$b, B1:$p), asm_str, [(set t.Ty:$dst, (select i1:$p, t.Ty:$a, t.Ty:$b))]>; def ri : BasicNVPTXInst<(outs t.RC:$dst), - (ins t.RC:$a, t.Imm:$b, Int1Regs:$p), + (ins t.RC:$a, t.Imm:$b, B1:$p), asm_str, [(set t.Ty:$dst, (select i1:$p, t.Ty:$a, t.ImmNode:$b))]>; def ir : BasicNVPTXInst<(outs t.RC:$dst), - (ins t.Imm:$a, t.RC:$b, Int1Regs:$p), + (ins t.Imm:$a, t.RC:$b, B1:$p), asm_str, [(set t.Ty:$dst, (select i1:$p, t.ImmNode:$a, t.Ty:$b))]>; def ii : BasicNVPTXInst<(outs t.RC:$dst), - (ins t.Imm:$a, t.Imm:$b, Int1Regs:$p), + (ins t.Imm:$a, t.Imm:$b, B1:$p), asm_str, [(set t.Ty:$dst, (select i1:$p, t.ImmNode:$a, t.ImmNode:$b))]>; } @@ -833,7 +810,7 @@ defm SELP_f64 : SELP_PATTERN<"f64", F64RT>; // This does not work as tablegen fails to infer the type of 'imm'. // def v2f16imm : Operand; -// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>; +// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, B32, v2f16imm, imm>; foreach vt = [v2f16, v2bf16, v2i16, v4i8] in { def : Pat<(vt (select i1:$p, vt:$a, vt:$b)), @@ -846,10 +823,10 @@ def : Pat<(vt (select i1:$p, vt:$a, vt:$b)), def fabs_oneuse : OneUse1; -def TESTINF_f32r : BasicNVPTXInst<(outs Int1Regs:$p), (ins Float32Regs:$a), +def TESTINF_f32r : BasicNVPTXInst<(outs B1:$p), (ins B32:$a), "testp.infinite.f32", [(set i1:$p, (seteq (fabs_oneuse f32:$a), fpimm_pos_inf))]>; -def TESTINF_f64r : BasicNVPTXInst<(outs Int1Regs:$p), (ins Float64Regs:$a), +def TESTINF_f64r : BasicNVPTXInst<(outs B1:$p), (ins B64:$a), "testp.infinite.f64", [(set i1:$p, (seteq (fabs_oneuse f64:$a), fpimm_pos_inf))]>; @@ -893,9 +870,9 @@ multiclass ABS { "abs" # SizeName, [(set T:$dst, (abs T:$a))]>; } -defm ABS_16 : ABS; -defm ABS_32 : ABS; -defm ABS_64 : ABS; +defm ABS_16 : ABS; +defm ABS_32 : ABS; +defm ABS_64 : ABS; // Integer min/max. defm SMAX : I3<"max.s", smax, commutative = true>; @@ -913,44 +890,32 @@ def UMIN16x2 : I16x2<"min.u", umin>; // Wide multiplication // def MULWIDES64 : - BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - "mul.wide.s32">; + BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, B32:$b), "mul.wide.s32">; def MULWIDES64Imm : - BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - "mul.wide.s32">; + BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, i32imm:$b), "mul.wide.s32">; def MULWIDES64Imm64 : - BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), - "mul.wide.s32">; + BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, i64imm:$b), "mul.wide.s32">; def MULWIDEU64 : - BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - "mul.wide.u32">; + BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, B32:$b), "mul.wide.u32">; def MULWIDEU64Imm : - BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - "mul.wide.u32">; + BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, i32imm:$b), "mul.wide.u32">; def MULWIDEU64Imm64 : - BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), - "mul.wide.u32">; + BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, i64imm:$b), "mul.wide.u32">; def MULWIDES32 : - BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - "mul.wide.s16">; + BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, B16:$b), "mul.wide.s16">; def MULWIDES32Imm : - BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - "mul.wide.s16">; + BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, i16imm:$b), "mul.wide.s16">; def MULWIDES32Imm32 : - BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), - "mul.wide.s16">; + BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, i32imm:$b), "mul.wide.s16">; def MULWIDEU32 : - BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - "mul.wide.u16">; + BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, B16:$b), "mul.wide.u16">; def MULWIDEU32Imm : - BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - "mul.wide.u16">; + BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, i16imm:$b), "mul.wide.u16">; def MULWIDEU32Imm32 : - BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), - "mul.wide.u16">; + BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, i32imm:$b), "mul.wide.u16">; def SDTMulWide : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>; def mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>; @@ -1088,9 +1053,9 @@ multiclass MAD { } let Predicates = [hasOptEnabled] in { -defm MAD16 : MAD<"mad.lo.s16", i16, Int16Regs, i16imm>; -defm MAD32 : MAD<"mad.lo.s32", i32, Int32Regs, i32imm>; -defm MAD64 : MAD<"mad.lo.s64", i64, Int64Regs, i64imm>; +defm MAD16 : MAD<"mad.lo.s16", i16, B16, i16imm>; +defm MAD32 : MAD<"mad.lo.s32", i32, B32, i32imm>; +defm MAD64 : MAD<"mad.lo.s64", i64, B64, i64imm>; } foreach t = [I16RT, I32RT, I64RT] in { @@ -1146,10 +1111,10 @@ class FNEG_F16_F16X2, Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>; -def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Int16Regs, doF32FTZ>; -def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Int16Regs, True>; -def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>; -def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Int32Regs, True>; +def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, B16, doF32FTZ>; +def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, B16, True>; +def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, B32, doF32FTZ>; +def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, B32, True>; // // BF16 NEG @@ -1160,27 +1125,27 @@ class FNEG_BF16_F16X2, Requires<[hasBF16Math, hasPTX<70>, hasSM<80>, Pred]>; -def BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, Int16Regs, doF32FTZ>; -def BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, Int16Regs, True>; -def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, Int32Regs, doF32FTZ>; -def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, Int32Regs, True>; +def BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, B16, doF32FTZ>; +def BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, B16, True>; +def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, B32, doF32FTZ>; +def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, B32, True>; // // F64 division // def FRCP64r : - BasicNVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$b), + BasicNVPTXInst<(outs B64:$dst), + (ins B64:$b), "rcp.rn.f64", [(set f64:$dst, (fdiv f64imm_1, f64:$b))]>; def FDIV64rr : - BasicNVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), + BasicNVPTXInst<(outs B64:$dst), + (ins B64:$a, B64:$b), "div.rn.f64", [(set f64:$dst, (fdiv f64:$a, f64:$b))]>; def FDIV64ri : - BasicNVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), + BasicNVPTXInst<(outs B64:$dst), + (ins B64:$a, f64imm:$b), "div.rn.f64", [(set f64:$dst, (fdiv f64:$a, fpimm:$b))]>; @@ -1200,14 +1165,14 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b), def FRCP32_approx_r_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$b), "rcp.approx.ftz.f32", [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>, Requires<[doF32FTZ]>; def FRCP32_approx_r : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$b), "rcp.approx.f32", [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>; @@ -1215,25 +1180,25 @@ def FRCP32_approx_r : // F32 Approximate division // def FDIV32approxrr_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), "div.approx.ftz.f32", [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>, Requires<[doF32FTZ]>; def FDIV32approxri_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, f32imm:$b), "div.approx.ftz.f32", [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>, Requires<[doF32FTZ]>; def FDIV32approxrr : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), "div.approx.f32", [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>; def FDIV32approxri : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, f32imm:$b), "div.approx.f32", [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>; // @@ -1259,25 +1224,25 @@ def : Pat<(fdiv_full f32imm_1, f32:$b), // F32 Semi-accurate division // def FDIV32rr_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), "div.full.ftz.f32", [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>, Requires<[doF32FTZ]>; def FDIV32ri_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, f32imm:$b), "div.full.ftz.f32", [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>, Requires<[doF32FTZ]>; def FDIV32rr : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), "div.full.f32", [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>; def FDIV32ri : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, f32imm:$b), "div.full.f32", [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>; // @@ -1290,39 +1255,39 @@ def fdiv_ftz : PatFrag<(ops node:$a, node:$b), }]>; def FRCP32r_prec_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$b), "rcp.rn.ftz.f32", [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>, Requires<[doF32FTZ]>; def FRCP32r_prec : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$b), "rcp.rn.f32", [(set f32:$dst, (fdiv f32imm_1, f32:$b))]>; // // F32 Accurate division // def FDIV32rr_prec_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), "div.rn.ftz.f32", [(set f32:$dst, (fdiv_ftz f32:$a, f32:$b))]>, Requires<[doF32FTZ]>; def FDIV32ri_prec_ftz : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, f32imm:$b), "div.rn.ftz.f32", [(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>, Requires<[doF32FTZ]>; def FDIV32rr_prec : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b), "div.rn.f32", [(set f32:$dst, (fdiv f32:$a, f32:$b))]>; def FDIV32ri_prec : - BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, f32imm:$b), "div.rn.f32", [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>; @@ -1378,10 +1343,10 @@ class UnaryOpAllowsApproxFn return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs(); }]>; -def SINF: BasicNVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), +def SINF: BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), "sin.approx.f32", [(set f32:$dst, (UnaryOpAllowsApproxFn f32:$src))]>; -def COSF: BasicNVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), +def COSF: BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), "cos.approx.f32", [(set f32:$dst, (UnaryOpAllowsApproxFn f32:$src))]>; @@ -1433,16 +1398,16 @@ foreach vt = [v2i16, v4i8] in { (ANDb32ri $a, imm:$b)>; } -def NOT1 : BasicNVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), +def NOT1 : BasicNVPTXInst<(outs B1:$dst), (ins B1:$src), "not.pred", [(set i1:$dst, (not i1:$src))]>; -def NOT16 : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), +def NOT16 : BasicNVPTXInst<(outs B16:$dst), (ins B16:$src), "not.b16", [(set i16:$dst, (not i16:$src))]>; -def NOT32 : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), +def NOT32 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), "not.b32", [(set i32:$dst, (not i32:$src))]>; -def NOT64 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), +def NOT64 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$src), "not.b64", [(set i64:$dst, (not i64:$src))]>; @@ -1453,31 +1418,31 @@ def NOT64 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), // This template also defines a 32-bit shift (imm, imm) instruction. multiclass SHIFT { def i64rr : - BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b), + BasicNVPTXInst<(outs B64:$dst), (ins B64:$a, B32:$b), OpcStr # "64", [(set i64:$dst, (OpNode i64:$a, i32:$b))]>; def i64ri : - BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), + BasicNVPTXInst<(outs B64:$dst), (ins B64:$a, i32imm:$b), OpcStr # "64", [(set i64:$dst, (OpNode i64:$a, (i32 imm:$b)))]>; def i32rr : - BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), OpcStr # "32", [(set i32:$dst, (OpNode i32:$a, i32:$b))]>; def i32ri : - BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, i32imm:$b), OpcStr # "32", [(set i32:$dst, (OpNode i32:$a, (i32 imm:$b)))]>; def i32ii : - BasicNVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), + BasicNVPTXInst<(outs B32:$dst), (ins i32imm:$a, i32imm:$b), OpcStr # "32", [(set i32:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>; def i16rr : - BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b), + BasicNVPTXInst<(outs B16:$dst), (ins B16:$a, B32:$b), OpcStr # "16", [(set i16:$dst, (OpNode i16:$a, i32:$b))]>; def i16ri : - BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + BasicNVPTXInst<(outs B16:$dst), (ins B16:$a, i32imm:$b), OpcStr # "16", [(set i16:$dst, (OpNode i16:$a, (i32 imm:$b)))]>; } @@ -1488,11 +1453,11 @@ defm SRL : SHIFT<"shr.u", srl>; // Bit-reverse def BREV32 : - BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), + BasicNVPTXInst<(outs B32:$dst), (ins B32:$a), "brev.b32", [(set i32:$dst, (bitreverse i32:$a))]>; def BREV64 : - BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a), + BasicNVPTXInst<(outs B64:$dst), (ins B64:$a), "brev.b64", [(set i64:$dst, (bitreverse i64:$a))]>; @@ -1525,12 +1490,12 @@ def prmt : SDNode<"NVPTXISD::PRMT", SDTPRMT>; multiclass BFE { def rrr : BasicNVPTXInst<(outs RC:$d), - (ins RC:$a, Int32Regs:$b, Int32Regs:$c), + (ins RC:$a, B32:$b, B32:$c), Instr, [(set T:$d, (bfe T:$a, i32:$b, i32:$c))]>; def rri : BasicNVPTXInst<(outs RC:$d), - (ins RC:$a, Int32Regs:$b, i32imm:$c), + (ins RC:$a, B32:$b, i32imm:$c), Instr, [(set T:$d, (bfe T:$a, i32:$b, imm:$c))]>; def rii @@ -1543,12 +1508,12 @@ multiclass BFE { multiclass BFI { def rrrr : BasicNVPTXInst<(outs RC:$f), - (ins RC:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), + (ins RC:$a, RC:$b, B32:$c, B32:$d), Instr, [(set T:$f, (bfi T:$a, T:$b, i32:$c, i32:$d))]>; def rrri : BasicNVPTXInst<(outs RC:$f), - (ins RC:$a, RC:$b, Int32Regs:$c, i32imm:$d), + (ins RC:$a, RC:$b, B32:$c, i32imm:$d), Instr, [(set T:$f, (bfi T:$a, T:$b, i32:$c, imm:$d))]>; def rrii @@ -1558,12 +1523,12 @@ multiclass BFI { [(set T:$f, (bfi T:$a, T:$b, imm:$c, imm:$d))]>; def irrr : BasicNVPTXInst<(outs RC:$f), - (ins ImmCls:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), + (ins ImmCls:$a, RC:$b, B32:$c, B32:$d), Instr, [(set T:$f, (bfi (T imm:$a), T:$b, i32:$c, i32:$d))]>; def irri : BasicNVPTXInst<(outs RC:$f), - (ins ImmCls:$a, RC:$b, Int32Regs:$c, i32imm:$d), + (ins ImmCls:$a, RC:$b, B32:$c, i32imm:$d), Instr, [(set T:$f, (bfi (T imm:$a), T:$b, i32:$c, imm:$d))]>; def irii @@ -1582,35 +1547,35 @@ let hasSideEffects = false in { // the same patterns, so the first one wins. Having unsigned byte extraction // has the benefit of always having zero in unused bits, which makes some // optimizations easier (e.g. no need to mask them). - defm BFE_U32 : BFE<"bfe.u32", i32, Int32Regs>; - defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>; - defm BFE_U64 : BFE<"bfe.u64", i64, Int64Regs>; - defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>; + defm BFE_U32 : BFE<"bfe.u32", i32, B32>; + defm BFE_S32 : BFE<"bfe.s32", i32, B32>; + defm BFE_U64 : BFE<"bfe.u64", i64, B64>; + defm BFE_S64 : BFE<"bfe.s64", i64, B64>; - defm BFI_B32 : BFI<"bfi.b32", i32, Int32Regs, i32imm>; - defm BFI_B64 : BFI<"bfi.b64", i64, Int64Regs, i64imm>; + defm BFI_B32 : BFI<"bfi.b32", i32, B32, i32imm>; + defm BFI_B64 : BFI<"bfi.b64", i64, B64, i64imm>; def PRMT_B32rrr - : BasicFlagsNVPTXInst<(outs Int32Regs:$d), - (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), + : BasicFlagsNVPTXInst<(outs B32:$d), + (ins B32:$a, B32:$b, B32:$c), (ins PrmtMode:$mode), "prmt.b32$mode", [(set i32:$d, (prmt i32:$a, i32:$b, i32:$c, imm:$mode))]>; def PRMT_B32rri - : BasicFlagsNVPTXInst<(outs Int32Regs:$d), - (ins Int32Regs:$a, Int32Regs:$b, Hexu32imm:$c), + : BasicFlagsNVPTXInst<(outs B32:$d), + (ins B32:$a, B32:$b, Hexu32imm:$c), (ins PrmtMode:$mode), "prmt.b32$mode", [(set i32:$d, (prmt i32:$a, i32:$b, imm:$c, imm:$mode))]>; def PRMT_B32rii - : BasicFlagsNVPTXInst<(outs Int32Regs:$d), - (ins Int32Regs:$a, i32imm:$b, Hexu32imm:$c), + : BasicFlagsNVPTXInst<(outs B32:$d), + (ins B32:$a, i32imm:$b, Hexu32imm:$c), (ins PrmtMode:$mode), "prmt.b32$mode", [(set i32:$d, (prmt i32:$a, imm:$b, imm:$c, imm:$mode))]>; def PRMT_B32rir - : BasicFlagsNVPTXInst<(outs Int32Regs:$d), - (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), + : BasicFlagsNVPTXInst<(outs B32:$d), + (ins B32:$a, i32imm:$b, B32:$c), (ins PrmtMode:$mode), "prmt.b32$mode", [(set i32:$d, (prmt i32:$a, imm:$b, i32:$c, imm:$mode))]>; @@ -1663,48 +1628,48 @@ def : Pat<(i16 (sext_inreg (trunc (srl i64:$s, (i32 imm:$o))), i8)), let hasSideEffects = false in { multiclass SETP { def rr : - BasicFlagsNVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b), (ins CmpMode:$cmp), + BasicFlagsNVPTXInst<(outs B1:$dst), (ins RC:$a, RC:$b), (ins CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}." # TypeStr>; def ri : - BasicFlagsNVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b), (ins CmpMode:$cmp), + BasicFlagsNVPTXInst<(outs B1:$dst), (ins RC:$a, ImmCls:$b), (ins CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}." # TypeStr>; def ir : - BasicFlagsNVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b), (ins CmpMode:$cmp), + BasicFlagsNVPTXInst<(outs B1:$dst), (ins ImmCls:$a, RC:$b), (ins CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}." # TypeStr>; } } -defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>; -defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>; -defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>; -defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>; -defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>; -defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>; -defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>; -defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>; -defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>; -defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>; -defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; +defm SETP_b16 : SETP<"b16", B16, i16imm>; +defm SETP_s16 : SETP<"s16", B16, i16imm>; +defm SETP_u16 : SETP<"u16", B16, i16imm>; +defm SETP_b32 : SETP<"b32", B32, i32imm>; +defm SETP_s32 : SETP<"s32", B32, i32imm>; +defm SETP_u32 : SETP<"u32", B32, i32imm>; +defm SETP_b64 : SETP<"b64", B64, i64imm>; +defm SETP_s64 : SETP<"s64", B64, i64imm>; +defm SETP_u64 : SETP<"u64", B64, i64imm>; +defm SETP_f32 : SETP<"f32", B32, f32imm>; +defm SETP_f64 : SETP<"f64", B64, f64imm>; def SETP_f16rr : - BasicFlagsNVPTXInst<(outs Int1Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b), (ins CmpMode:$cmp), + BasicFlagsNVPTXInst<(outs B1:$dst), + (ins B16:$a, B16:$b), (ins CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}.f16">, Requires<[useFP16Math]>; def SETP_f16x2rr : - BasicFlagsNVPTXInst<(outs Int1Regs:$p, Int1Regs:$q), - (ins Int32Regs:$a, Int32Regs:$b), (ins CmpMode:$cmp), + BasicFlagsNVPTXInst<(outs B1:$p, B1:$q), + (ins B32:$a, B32:$b), (ins CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}.f16x2">, Requires<[useFP16Math]>; def SETP_bf16rr : - BasicFlagsNVPTXInst<(outs Int1Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b), (ins CmpMode:$cmp), + BasicFlagsNVPTXInst<(outs B1:$dst), + (ins B16:$a, B16:$b), (ins CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}.bf16">, Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>; def SETP_bf16x2rr : - BasicFlagsNVPTXInst<(outs Int1Regs:$p, Int1Regs:$q), - (ins Int32Regs:$a, Int32Regs:$b), (ins CmpMode:$cmp), + BasicFlagsNVPTXInst<(outs B1:$p, B1:$q), + (ins B32:$a, B32:$b), (ins CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}.bf16x2">, Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>; @@ -1739,18 +1704,18 @@ def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; // Load a memory address into a u32 or u64 register. -def MOV_ADDR : BasicNVPTXInst<(outs Int32Regs:$dst), (ins ADDR_base:$a), +def MOV_ADDR : BasicNVPTXInst<(outs B32:$dst), (ins ADDR_base:$a), "mov.b32", [(set i32:$dst, (Wrapper tglobaladdr:$a))]>; -def MOV_ADDR64 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins ADDR_base:$a), +def MOV_ADDR64 : BasicNVPTXInst<(outs B64:$dst), (ins ADDR_base:$a), "mov.b64", [(set i64:$dst, (Wrapper tglobaladdr:$a))]>; // Get pointer to local stack. let hasSideEffects = false in { - def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), + def MOV_DEPOT_ADDR : NVPTXInst<(outs B32:$d), (ins i32imm:$num), "mov.b32 \t$d, __local_depot$num;", []>; - def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), + def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs B64:$d), (ins i32imm:$num), "mov.b64 \t$d, __local_depot$num;", []>; } @@ -1769,29 +1734,29 @@ let hasSideEffects = false, isAsCheapAsAMove = true in { [(set VT:$dst, ImmNode:$src)]>; } -def IMOV1r : MOVr; -def MOV16r : MOVr; -def IMOV32r : MOVr; -def IMOV64r : MOVr; -def IMOV128r : MOVr; +def IMOV1r : MOVr; +def MOV16r : MOVr; +def IMOV32r : MOVr; +def IMOV64r : MOVr; +def IMOV128r : MOVr; -def IMOV1i : MOVi; -def IMOV16i : MOVi; -def IMOV32i : MOVi; -def IMOV64i : MOVi; -def FMOV16i : MOVi; -def BFMOV16i : MOVi; -def FMOV32i : MOVi; -def FMOV64i : MOVi; +def IMOV1i : MOVi; +def IMOV16i : MOVi; +def IMOV32i : MOVi; +def IMOV64i : MOVi; +def FMOV16i : MOVi; +def BFMOV16i : MOVi; +def FMOV32i : MOVi; +def FMOV64i : MOVi; def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32i texternalsym:$dst)>; def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64i texternalsym:$dst)>; //---- Copy Frame Index ---- -def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR:$addr), +def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr), "add.u32 \t$dst, ${addr:add};", []>; -def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR:$addr), +def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr), "add.u64 \t$dst, ${addr:add};", []>; def to_tframeindex : SDNodeXForm; // comparisons of i8 extracted with BFE as i32 // It's faster to do comparison directly on i32 extracted by BFE, // instead of the long conversion and sign extending. -def: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)), - (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))), +def: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)), + (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))), (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGT)>; -def: Pat<(setgt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)), - (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))), +def: Pat<(setgt (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)), + (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))), (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGT)>; -def: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)), - (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))), +def: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)), + (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))), (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGE)>; -def: Pat<(setge (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)), - (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))), +def: Pat<(setge (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)), + (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))), (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGE)>; -def: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)), - (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))), +def: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)), + (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))), (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLT)>; -def: Pat<(setlt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)), - (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))), +def: Pat<(setlt (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)), + (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))), (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLT)>; -def: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)), - (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))), +def: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)), + (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))), (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLE)>; -def: Pat<(setle (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)), - (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))), +def: Pat<(setle (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)), + (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))), (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLE)>; -def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), +def: Pat<(setugt (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHI)>; -def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), +def: Pat<(setugt (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHI)>; -def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), +def: Pat<(setuge (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHS)>; -def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), +def: Pat<(setuge (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHS)>; -def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), +def: Pat<(setult (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLO)>; -def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), +def: Pat<(setult (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLO)>; -def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), +def: Pat<(setule (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLS)>; -def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), +def: Pat<(setule (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLS)>; -def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), +def: Pat<(seteq (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpEQ)>; -def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), +def: Pat<(seteq (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpEQ)>; -def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))), +def: Pat<(setne (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpNE)>; -def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), - (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))), +def: Pat<(setne (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)), + (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))), (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>; // i1 compare -> i32 @@ -2228,56 +2193,56 @@ let isConvergent=1 in { defm ConvergentCallUni : CALL<"call.uni", PrintConvergentCallUni>; } -def LoadParamMemI64 : LoadParamMemInst; -def LoadParamMemI32 : LoadParamMemInst; -def LoadParamMemI16 : LoadParamMemInst; -def LoadParamMemI8 : LoadParamMemInst; -def LoadParamMemV2I64 : LoadParamV2MemInst; -def LoadParamMemV2I32 : LoadParamV2MemInst; -def LoadParamMemV2I16 : LoadParamV2MemInst; -def LoadParamMemV2I8 : LoadParamV2MemInst; -def LoadParamMemV4I32 : LoadParamV4MemInst; -def LoadParamMemV4I16 : LoadParamV4MemInst; -def LoadParamMemV4I8 : LoadParamV4MemInst; - -defm StoreParamI64 : StoreParamInst; -defm StoreParamI32 : StoreParamInst; -defm StoreParamI16 : StoreParamInst; -defm StoreParamI8 : StoreParamInst; - -defm StoreParamI8TruncI32 : StoreParamInst; -defm StoreParamI8TruncI64 : StoreParamInst; - -defm StoreParamV2I64 : StoreParamV2Inst; -defm StoreParamV2I32 : StoreParamV2Inst; -defm StoreParamV2I16 : StoreParamV2Inst; -defm StoreParamV2I8 : StoreParamV2Inst; - -defm StoreParamV4I32 : StoreParamV4Inst; -defm StoreParamV4I16 : StoreParamV4Inst; -defm StoreParamV4I8 : StoreParamV4Inst; - -defm StoreParamF32 : StoreParamInst; -defm StoreParamF64 : StoreParamInst; - -defm StoreParamV2F32 : StoreParamV2Inst; -defm StoreParamV2F64 : StoreParamV2Inst; - -defm StoreParamV4F32 : StoreParamV4Inst; - -def StoreRetvalI64 : StoreRetvalInst; -def StoreRetvalI32 : StoreRetvalInst; -def StoreRetvalI16 : StoreRetvalInst; -def StoreRetvalI8 : StoreRetvalInst; -def StoreRetvalI8TruncI32 : StoreRetvalInst; -def StoreRetvalI8TruncI64 : StoreRetvalInst; -def StoreRetvalV2I64 : StoreRetvalV2Inst; -def StoreRetvalV2I32 : StoreRetvalV2Inst; -def StoreRetvalV2I16 : StoreRetvalV2Inst; -def StoreRetvalV2I8 : StoreRetvalV2Inst; -def StoreRetvalV4I32 : StoreRetvalV4Inst; -def StoreRetvalV4I16 : StoreRetvalV4Inst; -def StoreRetvalV4I8 : StoreRetvalV4Inst; +def LoadParamMemI64 : LoadParamMemInst; +def LoadParamMemI32 : LoadParamMemInst; +def LoadParamMemI16 : LoadParamMemInst; +def LoadParamMemI8 : LoadParamMemInst; +def LoadParamMemV2I64 : LoadParamV2MemInst; +def LoadParamMemV2I32 : LoadParamV2MemInst; +def LoadParamMemV2I16 : LoadParamV2MemInst; +def LoadParamMemV2I8 : LoadParamV2MemInst; +def LoadParamMemV4I32 : LoadParamV4MemInst; +def LoadParamMemV4I16 : LoadParamV4MemInst; +def LoadParamMemV4I8 : LoadParamV4MemInst; + +defm StoreParamI64 : StoreParamInst; +defm StoreParamI32 : StoreParamInst; +defm StoreParamI16 : StoreParamInst; +defm StoreParamI8 : StoreParamInst; + +defm StoreParamI8TruncI32 : StoreParamInst; +defm StoreParamI8TruncI64 : StoreParamInst; + +defm StoreParamV2I64 : StoreParamV2Inst; +defm StoreParamV2I32 : StoreParamV2Inst; +defm StoreParamV2I16 : StoreParamV2Inst; +defm StoreParamV2I8 : StoreParamV2Inst; + +defm StoreParamV4I32 : StoreParamV4Inst; +defm StoreParamV4I16 : StoreParamV4Inst; +defm StoreParamV4I8 : StoreParamV4Inst; + +defm StoreParamF32 : StoreParamInst; +defm StoreParamF64 : StoreParamInst; + +defm StoreParamV2F32 : StoreParamV2Inst; +defm StoreParamV2F64 : StoreParamV2Inst; + +defm StoreParamV4F32 : StoreParamV4Inst; + +def StoreRetvalI64 : StoreRetvalInst; +def StoreRetvalI32 : StoreRetvalInst; +def StoreRetvalI16 : StoreRetvalInst; +def StoreRetvalI8 : StoreRetvalInst; +def StoreRetvalI8TruncI32 : StoreRetvalInst; +def StoreRetvalI8TruncI64 : StoreRetvalInst; +def StoreRetvalV2I64 : StoreRetvalV2Inst; +def StoreRetvalV2I32 : StoreRetvalV2Inst; +def StoreRetvalV2I16 : StoreRetvalV2Inst; +def StoreRetvalV2I8 : StoreRetvalV2Inst; +def StoreRetvalV4I32 : StoreRetvalV4Inst; +def StoreRetvalV4I16 : StoreRetvalV4Inst; +def StoreRetvalV4I8 : StoreRetvalV4Inst; def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; @@ -2291,9 +2256,9 @@ def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", def CallVoidInst : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ", [(CallVoid (Wrapper tglobaladdr:$addr))]>; -def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", +def CallVoidInstReg : NVPTXInst<(outs), (ins B32:$addr), "$addr, ", [(CallVoid i32:$addr)]>; -def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ", +def CallVoidInstReg64 : NVPTXInst<(outs), (ins B64:$addr), "$addr, ", [(CallVoid i64:$addr)]>; def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;", [(Prototype (i32 imm:$val))]>; @@ -2337,11 +2302,11 @@ class PseudoUseParamInst : "// Pseudo use of $src", [(PseudoUseParam vt:$src)]>; -def PseudoUseParamI64 : PseudoUseParamInst; -def PseudoUseParamI32 : PseudoUseParamInst; -def PseudoUseParamI16 : PseudoUseParamInst; -def PseudoUseParamF64 : PseudoUseParamInst; -def PseudoUseParamF32 : PseudoUseParamInst; +def PseudoUseParamI64 : PseudoUseParamInst; +def PseudoUseParamI32 : PseudoUseParamInst; +def PseudoUseParamI16 : PseudoUseParamInst; +def PseudoUseParamF64 : PseudoUseParamInst; +def PseudoUseParamF32 : PseudoUseParamInst; multiclass ProxyRegInst { def NAME : BasicNVPTXInst<(outs rc:$dst), (ins rc:$src), @@ -2350,10 +2315,10 @@ multiclass ProxyRegInst { def : Pat<(vt (ProxyReg vt:$src)), (!cast(NAME) $src)>; } -defm ProxyRegB1 : ProxyRegInst<"pred", Int1Regs>; -defm ProxyRegB16 : ProxyRegInst<"b16", Int16Regs>; -defm ProxyRegB32 : ProxyRegInst<"b32", Int32Regs>; -defm ProxyRegB64 : ProxyRegInst<"b64", Int64Regs>; +defm ProxyRegB1 : ProxyRegInst<"pred", B1>; +defm ProxyRegB16 : ProxyRegInst<"b16", B16>; +defm ProxyRegB32 : ProxyRegInst<"b32", B32>; +defm ProxyRegB64 : ProxyRegInst<"b64", B64>; // // Load / Store Handling @@ -2367,10 +2332,10 @@ class LD "\t$dst, [$addr];", []>; let mayLoad=1, hasSideEffects=0 in { - def LD_i8 : LD; - def LD_i16 : LD; - def LD_i32 : LD; - def LD_i64 : LD; + def LD_i8 : LD; + def LD_i16 : LD; + def LD_i32 : LD; + def LD_i64 : LD; } class ST @@ -2382,10 +2347,10 @@ class ST " \t[$addr], $src;", []>; let mayStore=1, hasSideEffects=0 in { - def ST_i8 : ST; - def ST_i16 : ST; - def ST_i32 : ST; - def ST_i64 : ST; + def ST_i8 : ST; + def ST_i16 : ST; + def ST_i32 : ST; + def ST_i64 : ST; } // The following is used only in and after vector elementizations. Vector @@ -2415,10 +2380,10 @@ multiclass LD_VEC { "[$addr];", []>; } let mayLoad=1, hasSideEffects=0 in { - defm LDV_i8 : LD_VEC; - defm LDV_i16 : LD_VEC; - defm LDV_i32 : LD_VEC; - defm LDV_i64 : LD_VEC; + defm LDV_i8 : LD_VEC; + defm LDV_i16 : LD_VEC; + defm LDV_i32 : LD_VEC; + defm LDV_i64 : LD_VEC; } multiclass ST_VEC { @@ -2449,15 +2414,15 @@ multiclass ST_VEC { } let mayStore=1, hasSideEffects=0 in { - defm STV_i8 : ST_VEC; - defm STV_i16 : ST_VEC; - defm STV_i32 : ST_VEC; - defm STV_i64 : ST_VEC; + defm STV_i8 : ST_VEC; + defm STV_i16 : ST_VEC; + defm STV_i32 : ST_VEC; + defm STV_i64 : ST_VEC; } //---- Conversion ---- -foreach rc = [Int16Regs, Int32Regs, Int64Regs] in +foreach rc = [B16, B32, B64] in foreach ta = rc.RegTypes in foreach tb = rc.RegTypes in if !ne(ta, tb) then @@ -2638,62 +2603,62 @@ def : Pat<(sext_inreg i64:$a, i32), (CVT_INREG_s64_s32 $a)>; let hasSideEffects = false in { // pack a set of smaller int registers to a larger int register - def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), - (ins Int16Regs:$s1, Int16Regs:$s2, - Int16Regs:$s3, Int16Regs:$s4), + def V4I16toI64 : NVPTXInst<(outs B64:$d), + (ins B16:$s1, B16:$s2, + B16:$s3, B16:$s4), "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>; - def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), - (ins Int16Regs:$s1, Int16Regs:$s2), + def V2I16toI32 : NVPTXInst<(outs B32:$d), + (ins B16:$s1, B16:$s2), "mov.b32 \t$d, {{$s1, $s2}};", []>; - def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), - (ins Int32Regs:$s1, Int32Regs:$s2), + def V2I32toI64 : NVPTXInst<(outs B64:$d), + (ins B32:$s1, B32:$s2), "mov.b64 \t$d, {{$s1, $s2}};", []>; - def V2I64toI128 : NVPTXInst<(outs Int128Regs:$d), - (ins Int64Regs:$s1, Int64Regs:$s2), + def V2I64toI128 : NVPTXInst<(outs B128:$d), + (ins B64:$s1, B64:$s2), "mov.b128 \t$d, {{$s1, $s2}};", []>; // unpack a larger int register to a set of smaller int registers - def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, - Int16Regs:$d3, Int16Regs:$d4), - (ins Int64Regs:$s), + def I64toV4I16 : NVPTXInst<(outs B16:$d1, B16:$d2, + B16:$d3, B16:$d4), + (ins B64:$s), "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; - def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), - (ins Int32Regs:$s), + def I32toV2I16 : NVPTXInst<(outs B16:$d1, B16:$d2), + (ins B32:$s), "mov.b32 \t{{$d1, $d2}}, $s;", []>; - def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), - (ins Int64Regs:$s), + def I64toV2I32 : NVPTXInst<(outs B32:$d1, B32:$d2), + (ins B64:$s), "mov.b64 \t{{$d1, $d2}}, $s;", []>; - def I128toV2I64: NVPTXInst<(outs Int64Regs:$d1, Int64Regs:$d2), - (ins Int128Regs:$s), + def I128toV2I64: NVPTXInst<(outs B64:$d1, B64:$d2), + (ins B128:$s), "mov.b128 \t{{$d1, $d2}}, $s;", []>; - def I32toI16H : NVPTXInst<(outs Int16Regs:$high), - (ins Int32Regs:$s), + def I32toI16H : NVPTXInst<(outs B16:$high), + (ins B32:$s), "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}", []>; - def I32toI16L : NVPTXInst<(outs Int16Regs:$low), - (ins Int32Regs:$s), + def I32toI16L : NVPTXInst<(outs B16:$low), + (ins B32:$s), "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}", []>; - def I64toI32H : NVPTXInst<(outs Int32Regs:$high), - (ins Int64Regs:$s), + def I64toI32H : NVPTXInst<(outs B32:$high), + (ins B64:$s), "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}", []>; - def I64toI32L : NVPTXInst<(outs Int32Regs:$low), - (ins Int64Regs:$s), + def I64toI32L : NVPTXInst<(outs B32:$low), + (ins B64:$s), "{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}", []>; // PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the // unused high/low part. let Predicates = [hasPTX<71>] in { - def I32toI16H_Sink : NVPTXInst<(outs Int16Regs:$high), (ins Int32Regs:$s), + def I32toI16H_Sink : NVPTXInst<(outs B16:$high), (ins B32:$s), "mov.b32 \t{{_, $high}}, $s;", []>; - def I32toI16L_Sink : NVPTXInst<(outs Int16Regs:$low), (ins Int32Regs:$s), + def I32toI16L_Sink : NVPTXInst<(outs B16:$low), (ins B32:$s), "mov.b32 \t{{$low, _}}, $s;", []>; - def I64toI32H_Sink : NVPTXInst<(outs Int32Regs:$high), (ins Int64Regs:$s), + def I64toI32H_Sink : NVPTXInst<(outs B32:$high), (ins B64:$s), "mov.b64 \t{{_, $high}}, $s;", []>; - def I64toI32L_Sink : NVPTXInst<(outs Int32Regs:$low), (ins Int64Regs:$s), + def I64toI32L_Sink : NVPTXInst<(outs B32:$low), (ins B64:$s), "mov.b64 \t{{$low, _}}, $s;", []>; } } @@ -2748,16 +2713,16 @@ def fshr_clamp : SDNode<"NVPTXISD::FSHR_CLAMP", SDTIntShiftDOp, []>; let hasSideEffects = false in { multiclass ShfInst { def _i - : BasicNVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), + : BasicNVPTXInst<(outs B32:$dst), + (ins B32:$lo, B32:$hi, i32imm:$amt), "shf." # mode # ".b32", [(set i32:$dst, (op i32:$hi, i32:$lo, (i32 imm:$amt)))]>, Requires<[hasHWROT32]>; def _r - : BasicNVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + : BasicNVPTXInst<(outs B32:$dst), + (ins B32:$lo, B32:$hi, B32:$amt), "shf." # mode # ".b32", [(set i32:$dst, (op i32:$hi, i32:$lo, i32:$amt))]>, @@ -2782,12 +2747,12 @@ def : Pat<(i32 (int_nvvm_fshr_clamp i32:$hi, i32:$lo, (i32 imm:$amt))), let hasSideEffects = false in { foreach RT = [I32RT, I64RT] in { // Count leading zeros - def CLZr # RT.Size : BasicNVPTXInst<(outs Int32Regs:$d), (ins RT.RC:$a), + def CLZr # RT.Size : BasicNVPTXInst<(outs B32:$d), (ins RT.RC:$a), "clz.b" # RT.Size, [(set i32:$d, (ctlz RT.Ty:$a))]>; // Population count - def POPCr # RT.Size : BasicNVPTXInst<(outs Int32Regs:$d), (ins RT.RC:$a), + def POPCr # RT.Size : BasicNVPTXInst<(outs B32:$d), (ins RT.RC:$a), "popc.b" # RT.Size, [(set i32:$d, (ctpop RT.Ty:$a))]>; } @@ -2862,11 +2827,11 @@ let isTerminator=1 in { def Return : BasicNVPTXInst<(outs), (ins), "ret", [(retglue)]>; let isBranch=1 in - def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), + def CBranch : NVPTXInst<(outs), (ins B1:$a, brtarget:$target), "@$a bra \t$target;", [(brcond i1:$a, bb:$target)]>; let isBranch=1 in - def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), + def CBranchOther : NVPTXInst<(outs), (ins B1:$a, brtarget:$target), "@!$a bra \t$target;", []>; let isBranch=1, isBarrier=1 in @@ -2987,7 +2952,7 @@ let isTerminator = 1, isBranch = 1, isIndirectBranch = 1, isNotDuplicable = 1 in [(brx_item bb:$target)]>; def BRX_END : - NVPTXInst<(outs), (ins brtarget:$target, Int32Regs:$val, i32imm:$id), + NVPTXInst<(outs), (ins brtarget:$target, B32:$val, i32imm:$id), "\t$target;\n\tbrx.idx \t$val, $$L_brx_$id;", [(brx_end bb:$target, i32:$val, (i32 imm:$id))]> { let isBarrier = 1; @@ -2999,8 +2964,8 @@ foreach a_type = ["s", "u"] in { foreach b_type = ["s", "u"] in { def DOT4_ # a_type # b_type : - BasicNVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b, B32:$c), "dp4a." # a_type # "32." # b_type # "32", [(set i32:$dst, (!cast("int_nvvm_idp4a_" # a_type # "_" # b_type) @@ -3011,8 +2976,8 @@ foreach a_type = ["s", "u"] in { defvar lohi_suffix = !if(is_hi, "hi", "lo"); def DOT2_ # lohi_suffix # _ # a_type # b_type : - BasicNVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), + BasicNVPTXInst<(outs B32:$dst), + (ins B32:$a, B32:$b, B32:$c), "dp2a." # lohi_suffix # "." # a_type # "32." # b_type # "32", [(set i32:$dst, (!cast("int_nvvm_idp2a_" # a_type # "_" # b_type) @@ -3124,12 +3089,12 @@ class NVPTXInst_rrr Preds> : BasicNVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), Instruction>, Requires; -def FMARELU_F16 : NVPTXInst_rrr, hasSM<80>]>; -def FMARELU_F16_FTZ : NVPTXInst_rrr, hasSM<80>]>; -def FMARELU_BF16 : NVPTXInst_rrr, hasSM<80>]>; -def FMARELU_F16X2 : NVPTXInst_rrr, hasSM<80>]>; -def FMARELU_F16X2_FTZ : NVPTXInst_rrr, hasSM<80>]>; -def FMARELU_BF16X2 : NVPTXInst_rrr, hasSM<80>]>; +def FMARELU_F16 : NVPTXInst_rrr, hasSM<80>]>; +def FMARELU_F16_FTZ : NVPTXInst_rrr, hasSM<80>]>; +def FMARELU_BF16 : NVPTXInst_rrr, hasSM<80>]>; +def FMARELU_F16X2 : NVPTXInst_rrr, hasSM<80>]>; +def FMARELU_F16X2_FTZ : NVPTXInst_rrr, hasSM<80>]>; +def FMARELU_BF16X2 : NVPTXInst_rrr, hasSM<80>]>; // FTZ def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)), diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 5de3dee1fb344..10d7f04d8d937 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -63,14 +63,14 @@ class RegSeq { // Synchronization and shuffle functions //----------------------------------- let isConvergent = true in { -def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), +def INT_BARRIER0_POPC : NVPTXInst<(outs B32:$dst), (ins B32:$pred), !strconcat("{{ \n\t", ".reg .pred \t%p1; \n\t", "setp.ne.u32 \t%p1, $pred, 0; \n\t", "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", "}}"), [(set i32:$dst, (int_nvvm_barrier0_popc i32:$pred))]>; -def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), +def INT_BARRIER0_AND : NVPTXInst<(outs B32:$dst), (ins B32:$pred), !strconcat("{{ \n\t", ".reg .pred \t%p1; \n\t", ".reg .pred \t%p2; \n\t", @@ -79,7 +79,7 @@ def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), "selp.u32 \t$dst, 1, 0, %p2; \n\t", "}}"), [(set i32:$dst, (int_nvvm_barrier0_and i32:$pred))]>; -def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), +def INT_BARRIER0_OR : NVPTXInst<(outs B32:$dst), (ins B32:$pred), !strconcat("{{ \n\t", ".reg .pred \t%p1; \n\t", ".reg .pred \t%p2; \n\t", @@ -92,7 +92,7 @@ def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), def INT_BAR_WARP_SYNC_I : BasicNVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync", [(int_nvvm_bar_warp_sync imm:$i)]>, Requires<[hasPTX<60>, hasSM<30>]>; -def INT_BAR_WARP_SYNC_R : BasicNVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync", +def INT_BAR_WARP_SYNC_R : BasicNVPTXInst<(outs), (ins B32:$i), "bar.warp.sync", [(int_nvvm_bar_warp_sync i32:$i)]>, Requires<[hasPTX<60>, hasSM<30>]>; @@ -101,21 +101,21 @@ multiclass BARRIER1 requires [(intrinsic imm:$i)]>, Requires; - def _r : BasicNVPTXInst<(outs), (ins Int32Regs:$i), asmstr, + def _r : BasicNVPTXInst<(outs), (ins B32:$i), asmstr, [(intrinsic i32:$i)]>, Requires; } multiclass BARRIER2 requires = []> { - def _rr : BasicNVPTXInst<(outs), (ins Int32Regs:$i, Int32Regs:$j), asmstr, + def _rr : BasicNVPTXInst<(outs), (ins B32:$i, B32:$j), asmstr, [(intrinsic i32:$i, i32:$j)]>, Requires; - def _ri : BasicNVPTXInst<(outs), (ins Int32Regs:$i, i32imm:$j), asmstr, + def _ri : BasicNVPTXInst<(outs), (ins B32:$i, i32imm:$j), asmstr, [(intrinsic i32:$i, imm:$j)]>, Requires; - def _ir : BasicNVPTXInst<(outs), (ins i32imm:$i, Int32Regs:$j), asmstr, + def _ir : BasicNVPTXInst<(outs), (ins i32imm:$i, B32:$j), asmstr, [(intrinsic imm:$i, i32:$j)]>, Requires; @@ -170,26 +170,26 @@ foreach sync = [false, true] in { # "_" # regclass # !if(return_pred, "p", "")); defvar InOperandList = !con( - (ins Int32Regs:$src), - !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), - !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]), + (ins B32:$src), + !dag(ins, !if(offset_imm, [i32imm], [B32]), ["offset"]), + !dag(ins, !if(mask_imm, [i32imm], [B32]), ["mask"]), !if(sync, - !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), + !dag(ins, !if(threadmask_imm, [i32imm], [B32]), ["threadmask"]), (ins))); defvar Pattern = !con( - (set Int32Regs:$dst), - !if(return_pred, (set Int1Regs:$pred), (set)), + (set B32:$dst), + !if(return_pred, (set B1:$pred), (set)), (set !con( !if(sync, - !dag(Intr, !if(threadmask_imm, [imm], [Int32Regs]), ["threadmask"]), + !dag(Intr, !if(threadmask_imm, [imm], [B32]), ["threadmask"]), (Intr)), - (Intr Int32Regs:$src), - !dag(Intr, !if(offset_imm, [imm], [Int32Regs]), ["offset"]), - !dag(Intr, !if(mask_imm, [imm], [Int32Regs]), ["mask"])))); + (Intr B32:$src), + !dag(Intr, !if(offset_imm, [imm], [B32]), ["offset"]), + !dag(Intr, !if(mask_imm, [imm], [B32]), ["mask"])))); def : BasicNVPTXInst< - !if(return_pred, (outs Int32Regs:$dst, Int1Regs:$pred), - (outs Int32Regs:$dst)), + !if(return_pred, (outs B32:$dst, B1:$pred), + (outs B32:$dst)), InOperandList, "shfl." # !if(sync, "sync.", "") # mode # ".b32", [Pattern]>, @@ -204,107 +204,107 @@ foreach sync = [false, true] in { // vote.{all,any,uni,ballot} multiclass VOTE { - def : BasicNVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), + def : BasicNVPTXInst<(outs regclass:$dest), (ins B1:$pred), "vote." # mode, [(set regclass:$dest, (IntOp i1:$pred))]>, Requires<[hasPTX<60>, hasSM<30>]>; } -defm VOTE_ALL : VOTE; -defm VOTE_ANY : VOTE; -defm VOTE_UNI : VOTE; -defm VOTE_BALLOT : VOTE; +defm VOTE_ALL : VOTE; +defm VOTE_ANY : VOTE; +defm VOTE_UNI : VOTE; +defm VOTE_BALLOT : VOTE; // vote.sync.{all,any,uni,ballot} multiclass VOTE_SYNC { - def i : BasicNVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred, i32imm:$mask), + def i : BasicNVPTXInst<(outs regclass:$dest), (ins B1:$pred, i32imm:$mask), "vote.sync." # mode, [(set regclass:$dest, (IntOp imm:$mask, i1:$pred))]>, Requires<[hasPTX<60>, hasSM<30>]>; - def r : BasicNVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred, Int32Regs:$mask), + def r : BasicNVPTXInst<(outs regclass:$dest), (ins B1:$pred, B32:$mask), "vote.sync." # mode, [(set regclass:$dest, (IntOp i32:$mask, i1:$pred))]>, Requires<[hasPTX<60>, hasSM<30>]>; } -defm VOTE_SYNC_ALL : VOTE_SYNC; -defm VOTE_SYNC_ANY : VOTE_SYNC; -defm VOTE_SYNC_UNI : VOTE_SYNC; -defm VOTE_SYNC_BALLOT : VOTE_SYNC; +defm VOTE_SYNC_ALL : VOTE_SYNC; +defm VOTE_SYNC_ANY : VOTE_SYNC; +defm VOTE_SYNC_UNI : VOTE_SYNC; +defm VOTE_SYNC_BALLOT : VOTE_SYNC; // elect.sync -def INT_ELECT_SYNC_I : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask), +def INT_ELECT_SYNC_I : BasicNVPTXInst<(outs B32:$dest, B1:$pred), (ins i32imm:$mask), "elect.sync", [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync imm:$mask))]>, Requires<[hasPTX<80>, hasSM<90>]>; -def INT_ELECT_SYNC_R : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask), +def INT_ELECT_SYNC_R : BasicNVPTXInst<(outs B32:$dest, B1:$pred), (ins B32:$mask), "elect.sync", [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync i32:$mask))]>, Requires<[hasPTX<80>, hasSM<90>]>; multiclass MATCH_ANY_SYNC { - def ii : BasicNVPTXInst<(outs Int32Regs:$dest), (ins ImmOp:$value, i32imm:$mask), + def ii : BasicNVPTXInst<(outs B32:$dest), (ins ImmOp:$value, i32imm:$mask), "match.any.sync." # ptxtype, [(set i32:$dest, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; - def ir : BasicNVPTXInst<(outs Int32Regs:$dest), (ins ImmOp:$value, Int32Regs:$mask), + def ir : BasicNVPTXInst<(outs B32:$dest), (ins ImmOp:$value, B32:$mask), "match.any.sync." # ptxtype, [(set i32:$dest, (IntOp i32:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; - def ri : BasicNVPTXInst<(outs Int32Regs:$dest), (ins regclass:$value, i32imm:$mask), + def ri : BasicNVPTXInst<(outs B32:$dest), (ins regclass:$value, i32imm:$mask), "match.any.sync." # ptxtype, [(set i32:$dest, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; - def rr : BasicNVPTXInst<(outs Int32Regs:$dest), (ins regclass:$value, Int32Regs:$mask), + def rr : BasicNVPTXInst<(outs B32:$dest), (ins regclass:$value, B32:$mask), "match.any.sync." # ptxtype, [(set i32:$dest, (IntOp i32:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; } // activemask.b32 -def ACTIVEMASK : BasicNVPTXInst<(outs Int32Regs:$dest), (ins), +def ACTIVEMASK : BasicNVPTXInst<(outs B32:$dest), (ins), "activemask.b32", [(set i32:$dest, (int_nvvm_activemask))]>, Requires<[hasPTX<62>, hasSM<30>]>; -defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC; -defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC; multiclass MATCH_ALLP_SYNC { - def ii : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), + def ii : BasicNVPTXInst<(outs B32:$dest, B1:$pred), (ins ImmOp:$value, i32imm:$mask), "match.all.sync." # ptxtype, [(set i32:$dest, i1:$pred, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; - def ir : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), - (ins ImmOp:$value, Int32Regs:$mask), + def ir : BasicNVPTXInst<(outs B32:$dest, B1:$pred), + (ins ImmOp:$value, B32:$mask), "match.all.sync." # ptxtype, [(set i32:$dest, i1:$pred, (IntOp i32:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; - def ri : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), + def ri : BasicNVPTXInst<(outs B32:$dest, B1:$pred), (ins regclass:$value, i32imm:$mask), "match.all.sync." # ptxtype, [(set i32:$dest, i1:$pred, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; - def rr : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), - (ins regclass:$value, Int32Regs:$mask), + def rr : BasicNVPTXInst<(outs B32:$dest, B1:$pred), + (ins regclass:$value, B32:$mask), "match.all.sync." # ptxtype, [(set i32:$dest, i1:$pred, (IntOp i32:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; } -defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC; -defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC; multiclass REDUX_SYNC { - def : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), + def : BasicNVPTXInst<(outs B32:$dst), (ins B32:$src, B32:$mask), "redux.sync." # BinOp # "." # PTXType, - [(set i32:$dst, (Intrin i32:$src, Int32Regs:$mask))]>, + [(set i32:$dst, (Intrin i32:$src, B32:$mask))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -320,10 +320,10 @@ defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>; multiclass REDUX_SYNC_F { defvar intr_name = "int_nvvm_redux_sync_f" # BinOp # !subst(".", "_", abs) # !subst(".", "_", NaN); - def : BasicNVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$src, Int32Regs:$mask), + def : BasicNVPTXInst<(outs B32:$dst), + (ins B32:$src, B32:$mask), "redux.sync." # BinOp # abs # NaN # ".f32", - [(set f32:$dst, (!cast(intr_name) f32:$src, Int32Regs:$mask))]>, + [(set f32:$dst, (!cast(intr_name) f32:$src, B32:$mask))]>, Requires<[hasPTX<86>, hasSM100a]>; } @@ -377,7 +377,7 @@ def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_SYS: // fence.proxy.tensormap.acquire variants class FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE : - NVPTXInst<(outs), (ins Int64Regs:$addr), + NVPTXInst<(outs), (ins B64:$addr), "fence.proxy.tensormap::generic.acquire." # Scope # " [$addr], 128;", [(Intr i64:$addr, (i32 128))]>, Requires<[hasPTX<83>, hasSM<90>]>; @@ -422,7 +422,7 @@ multiclass CP_ASYNC_SHARED_GLOBAL_I, hasSM<80>]>; // Variant with src_size parameter - def _s : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, Int32Regs:$src_size), + def _s : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, B32:$src_size), "cp.async." # cc # ".shared.global" # " [$dst], [$src], " # cpsize # ", $src_size;", [(IntrinS addr:$dst, addr:$src, i32:$src_size)]>, Requires<[hasPTX<70>, hasSM<80>]>; @@ -498,14 +498,14 @@ class CpAsyncBulkStr { } multiclass CP_ASYNC_BULK_S2G_INTR { - def "" : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, Int32Regs:$size, Int64Regs:$ch), + def "" : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, B32:$size, B64:$ch), !if(has_ch, CpAsyncBulkStr<0, 1>.S2G # " [$dst], [$src], $size, $ch;", CpAsyncBulkStr<0, 0>.S2G # " [$dst], [$src], $size;"), [(int_nvvm_cp_async_bulk_shared_cta_to_global addr:$dst, addr:$src, i32:$size, i64:$ch, !if(has_ch, -1, 0))]>, Requires<[hasPTX<80>, hasSM<90>]>; - def _BM : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, Int32Regs:$size, Int64Regs:$ch, Int16Regs:$mask), + def _BM : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, B32:$size, B64:$ch, B16:$mask), !if(has_ch, CpAsyncBulkStr<0, 1, 1>.S2G # " [$dst], [$src], $size, $ch, $mask;", CpAsyncBulkStr<0, 0, 1>.S2G # " [$dst], [$src], $size, $mask;"), @@ -520,7 +520,7 @@ multiclass CP_ASYNC_BULK_G2S_INTR { def "" : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$mbar, ADDR:$src, - Int32Regs:$size, Int16Regs:$mask, Int64Regs:$ch), + B32:$size, B16:$mask, B64:$ch), !if(has_ch, CpAsyncBulkStr<0, 1>.G2S # " [$dst], [$src], $size, [$mbar], $ch;", CpAsyncBulkStr<0, 0>.G2S # " [$dst], [$src], $size, [$mbar];"), @@ -529,7 +529,7 @@ multiclass CP_ASYNC_BULK_G2S_INTR { def _MC : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$mbar, ADDR:$src, - Int32Regs:$size, Int16Regs:$mask, Int64Regs:$ch), + B32:$size, B16:$mask, B64:$ch), !if(has_ch, CpAsyncBulkStr<1, 1>.G2S # " [$dst], [$src], $size, [$mbar], $mask, $ch;", CpAsyncBulkStr<1, 0>.G2S # " [$dst], [$src], $size, [$mbar], $mask;"), @@ -540,13 +540,13 @@ defm CP_ASYNC_BULK_G2S : CP_ASYNC_BULK_G2S_INTR; defm CP_ASYNC_BULK_G2S_CH : CP_ASYNC_BULK_G2S_INTR; def CP_ASYNC_BULK_CTA_TO_CLUSTER : NVPTXInst<(outs), - (ins ADDR:$dst, ADDR:$mbar, ADDR:$src, Int32Regs:$size), + (ins ADDR:$dst, ADDR:$mbar, ADDR:$src, B32:$size), CpAsyncBulkStr<0, 0>.C2C # " [$dst], [$src], $size, [$mbar];", [(int_nvvm_cp_async_bulk_shared_cta_to_cluster addr:$dst, addr:$mbar, addr:$src, i32:$size)]>, Requires<[hasPTX<80>, hasSM<90>]>; multiclass CP_ASYNC_BULK_PREFETCH_INTR { - def "" : NVPTXInst<(outs), (ins ADDR:$src, Int32Regs:$size, Int64Regs:$ch), + def "" : NVPTXInst<(outs), (ins ADDR:$src, B32:$size, B64:$ch), !if(has_ch, "cp.async.bulk.prefetch.L2.global.L2::cache_hint" # " [$src], $size, $ch;", "cp.async.bulk.prefetch.L2.global" # " [$src], $size;"), @@ -583,14 +583,14 @@ def CTAGroupFlags : Operand { } multiclass CP_ASYNC_BULK_TENSOR_G2S_INTR { - defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); + defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i)); defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); defvar asm_str_default = "$cg [$dst], [$tmap, {{" # dims_str # "}}], [$mbar]"; - defvar rc = !if(is_shared32, Int32Regs, Int64Regs); + defvar rc = !if(is_shared32, B32, B64); defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0); defvar im2col_dag = !if(!eq(mode, "im2col"), - !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)), + !dag(ins, !listsplat(B16, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)), (ins)); defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", "); defvar im2col_asm_str = ", {{" # im2col_str # "}}"; @@ -599,22 +599,22 @@ multiclass CP_ASYNC_BULK_TENSOR_G2S_INTR !strconcat(asm_str_default, im2col_asm_str), asm_str_default); def "" : NVPTXInst<(outs), - !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins CTAGroupFlags:$cg)), + !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag, (ins CTAGroupFlags:$cg)), !strconcat(G2S_STRINGS.inst_name, asm_str, ";"), []>, Requires<[hasPTX<80>, hasSM<90>]>; def _MC : NVPTXInst<(outs), - !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, - (ins Int16Regs:$mc, CTAGroupFlags:$cg)), + !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag, + (ins B16:$mc, CTAGroupFlags:$cg)), !strconcat(G2S_STRINGS.inst_name, asm_str, ", $mc;"), []>, Requires<[hasPTX<80>, hasSM<90>]>; def _CH : NVPTXInst<(outs), - !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, - (ins Int64Regs:$ch, CTAGroupFlags:$cg)), + !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag, + (ins B64:$ch, CTAGroupFlags:$cg)), !strconcat(G2S_STRINGS.inst_name, asm_str, ", $ch;"), []>, Requires<[hasPTX<80>, hasSM<90>]>; def _MC_CH : NVPTXInst<(outs), - !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, - (ins Int16Regs:$mc, Int64Regs:$ch, CTAGroupFlags:$cg)), + !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag, + (ins B16:$mc, B64:$ch, CTAGroupFlags:$cg)), !strconcat(G2S_STRINGS.inst_name, asm_str, ", $mc, $ch;"), []>, Requires<[hasPTX<80>, hasSM<90>]>; } @@ -648,17 +648,17 @@ class S2G_STRINGS { - defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); + defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i)); defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]"; - defvar rc = !if(shared32, Int32Regs, Int64Regs); + defvar rc = !if(shared32, B32, B64); def "" : NVPTXInst<(outs), - !con((ins rc:$src, Int64Regs:$tmap), dims_dag), + !con((ins rc:$src, B64:$tmap), dims_dag), !strconcat(S2G_STRINGS.inst_name, asm_str, ";"), []>, Requires<[hasPTX<80>, hasSM<90>]>; def _CH : NVPTXInst<(outs), - !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch)), + !con((ins rc:$src, B64:$tmap), dims_dag, (ins B64:$ch)), !strconcat(S2G_STRINGS.inst_name, asm_str, ", $ch;"), []>, Requires<[hasPTX<80>, hasSM<90>]>; } @@ -669,20 +669,20 @@ def TMAReductionFlags : Operand { // TMA Copy from Shared to Global memory with Reduction multiclass CP_ASYNC_BULK_TENSOR_REDUCE_INTR { - defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); + defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i)); defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]"; - defvar rc = !if(shared32, Int32Regs, Int64Regs); + defvar rc = !if(shared32, B32, B64); defvar prefix = "cp.reduce.async.bulk.tensor" # "." # dim # "d" # ".global.shared::cta"; defvar suffix = "." # mode # ".bulk_group"; def "" : NVPTXInst<(outs), - !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins TMAReductionFlags:$red_op)), + !con((ins rc:$src, B64:$tmap), dims_dag, (ins TMAReductionFlags:$red_op)), !strconcat(prefix, "${red_op}", suffix, asm_str, ";"), []>, Requires<[hasPTX<80>, hasSM<90>]>; def _CH : NVPTXInst<(outs), - !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch, TMAReductionFlags:$red_op)), + !con((ins rc:$src, B64:$tmap), dims_dag, (ins B64:$ch, TMAReductionFlags:$red_op)), !strconcat(prefix, "${red_op}", suffix, ".L2::cache_hint", asm_str, ", $ch;"), []>, Requires<[hasPTX<80>, hasSM<90>]>; } @@ -713,13 +713,13 @@ class PREFETCH_STRINGS { } multiclass CP_ASYNC_BULK_TENSOR_PREFETCH_INTR { - defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); + defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i)); defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); defvar asm_str_default = " [$tmap, {{" # dims_str # "}}]"; defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0); defvar im2col_dag = !if(!eq(mode, "im2col"), - !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)), + !dag(ins, !listsplat(B16, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)), (ins)); defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", "); defvar im2col_asm_str = ", {{" # im2col_str # "}}"; @@ -728,11 +728,11 @@ multiclass CP_ASYNC_BULK_TENSOR_PREFETCH_INTR { !strconcat(asm_str_default, im2col_asm_str), asm_str_default); def "" : NVPTXInst<(outs), - !con((ins Int64Regs:$tmap), dims_dag, im2col_dag), + !con((ins B64:$tmap), dims_dag, im2col_dag), !strconcat(PREFETCH_STRINGS.inst_name, asm_str, ";"), []>, Requires<[hasPTX<80>, hasSM<90>]>; def _CH : NVPTXInst<(outs), - !con((ins Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int64Regs:$ch)), + !con((ins B64:$tmap), dims_dag, im2col_dag, (ins B64:$ch)), !strconcat(PREFETCH_STRINGS.inst_name, asm_str, ", $ch;"), []>, Requires<[hasPTX<80>, hasSM<90>]>; } @@ -776,7 +776,7 @@ def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1">; //Applypriority intrinsics class APPLYPRIORITY_L2_INTRS : - BasicNVPTXInst<(outs), (ins ADDR:$addr, Int64Regs:$size), + BasicNVPTXInst<(outs), (ins ADDR:$addr, B64:$size), StrJoin<".", ["applypriority", addrspace , "L2::evict_normal"]>.ret, [(!cast(StrJoin<"_", ["int_nvvm_applypriority", addrspace , "L2_evict_normal"]>.ret) addr:$addr, i64:$size)]>, @@ -804,7 +804,7 @@ def DISCARD_GLOBAL_L2 : DISCARD_L2_INTRS<"global">; //----------------------------------- multiclass MBARRIER_INIT { - def "" : BasicNVPTXInst<(outs), (ins ADDR:$addr, Int32Regs:$count), + def "" : BasicNVPTXInst<(outs), (ins ADDR:$addr, B32:$count), "mbarrier.init" # AddrSpace # ".b64", [(Intrin addr:$addr, i32:$count)]>, Requires<[hasPTX<70>, hasSM<80>]>; @@ -826,7 +826,7 @@ defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", int_nvvm_mbarrier_inval_shared>; multiclass MBARRIER_ARRIVE { - def "" : BasicNVPTXInst<(outs Int64Regs:$state), (ins ADDR:$addr), + def "" : BasicNVPTXInst<(outs B64:$state), (ins ADDR:$addr), "mbarrier.arrive" # AddrSpace # ".b64", [(set i64:$state, (Intrin addr:$addr))]>, Requires<[hasPTX<70>, hasSM<80>]>; @@ -837,8 +837,8 @@ defm MBARRIER_ARRIVE_SHARED : MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>; multiclass MBARRIER_ARRIVE_NOCOMPLETE { - def "" : BasicNVPTXInst<(outs Int64Regs:$state), - (ins ADDR:$addr, Int32Regs:$count), + def "" : BasicNVPTXInst<(outs B64:$state), + (ins ADDR:$addr, B32:$count), "mbarrier.arrive.noComplete" # AddrSpace # ".b64", [(set i64:$state, (Intrin addr:$addr, i32:$count))]>, Requires<[hasPTX<70>, hasSM<80>]>; @@ -850,7 +850,7 @@ defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED : MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>; multiclass MBARRIER_ARRIVE_DROP { - def "" : BasicNVPTXInst<(outs Int64Regs:$state), (ins ADDR:$addr), + def "" : BasicNVPTXInst<(outs B64:$state), (ins ADDR:$addr), "mbarrier.arrive_drop" # AddrSpace # ".b64", [(set i64:$state, (Intrin addr:$addr))]>, Requires<[hasPTX<70>, hasSM<80>]>; @@ -862,8 +862,8 @@ defm MBARRIER_ARRIVE_DROP_SHARED : MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>; multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE { - def "" : BasicNVPTXInst<(outs Int64Regs:$state), - (ins ADDR:$addr, Int32Regs:$count), + def "" : BasicNVPTXInst<(outs B64:$state), + (ins ADDR:$addr, B32:$count), "mbarrier.arrive_drop.noComplete" # AddrSpace # ".b64", [(set i64:$state, (Intrin addr:$addr, i32:$count))]>, Requires<[hasPTX<70>, hasSM<80>]>; @@ -876,7 +876,7 @@ defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : int_nvvm_mbarrier_arrive_drop_noComplete_shared>; multiclass MBARRIER_TEST_WAIT { - def "" : BasicNVPTXInst<(outs Int1Regs:$res), (ins ADDR:$addr, Int64Regs:$state), + def "" : BasicNVPTXInst<(outs B1:$res), (ins ADDR:$addr, B64:$state), "mbarrier.test_wait" # AddrSpace # ".b64", [(set i1:$res, (Intrin addr:$addr, i64:$state))]>, Requires<[hasPTX<70>, hasSM<80>]>; @@ -888,7 +888,7 @@ defm MBARRIER_TEST_WAIT_SHARED : MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>; class MBARRIER_PENDING_COUNT : - BasicNVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), + BasicNVPTXInst<(outs B32:$res), (ins B64:$state), "mbarrier.pending_count.b64", [(set i32:$res, (Intrin i64:$state))]>, Requires<[hasPTX<70>, hasSM<80>]>; @@ -989,7 +989,7 @@ def : PRMT2Pat; def INT_NVVM_NANOSLEEP_I : BasicNVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32", [(int_nvvm_nanosleep imm:$i)]>, Requires<[hasPTX<63>, hasSM<70>]>; -def INT_NVVM_NANOSLEEP_R : BasicNVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32", +def INT_NVVM_NANOSLEEP_R : BasicNVPTXInst<(outs), (ins B32:$i), "nanosleep.u32", [(int_nvvm_nanosleep i32:$i)]>, Requires<[hasPTX<63>, hasSM<70>]>; @@ -1006,64 +1006,46 @@ def INT_PM_EVENT_MASK : BasicNVPTXInst<(outs), // Min Max // -def INT_NVVM_FMIN_F : F_MATH_2<"min.f32", Float32Regs, - Float32Regs, Float32Regs, int_nvvm_fmin_f>; -def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; -def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f, +def INT_NVVM_FMIN_F : F_MATH_2<"min.f32", B32, B32, B32, int_nvvm_fmin_f>; +def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32", B32, B32, B32, int_nvvm_fmin_ftz_f>; +def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32", B32, B32, B32, int_nvvm_fmin_nan_f, [hasPTX<70>, hasSM<80>]>; -def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f, +def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32", B32, B32, B32, int_nvvm_fmin_ftz_nan_f, [hasPTX<70>, hasSM<80>]>; def INT_NVVM_FMIN_XORSIGN_ABS_F : - F_MATH_2<"min.xorsign.abs.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f, + F_MATH_2<"min.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F : - F_MATH_2<"min.ftz.xorsign.abs.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f, + F_MATH_2<"min.ftz.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_ftz_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F : - F_MATH_2<"min.NaN.xorsign.abs.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f, + F_MATH_2<"min.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_nan_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F : - F_MATH_2<"min.ftz.NaN.xorsign.abs.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f, + F_MATH_2<"min.ftz.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_ftz_nan_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; -def INT_NVVM_FMAX_F : F_MATH_2<"max.f32", Float32Regs, - Float32Regs, Float32Regs, int_nvvm_fmax_f>; -def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; -def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f, +def INT_NVVM_FMAX_F : F_MATH_2<"max.f32", B32, B32, B32, int_nvvm_fmax_f>; +def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32", B32, B32, B32, int_nvvm_fmax_ftz_f>; +def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32", B32, B32, B32, int_nvvm_fmax_nan_f, [hasPTX<70>, hasSM<80>]>; -def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f, +def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32", B32, B32, B32, int_nvvm_fmax_ftz_nan_f, [hasPTX<70>, hasSM<80>]>; def INT_NVVM_FMAX_XORSIGN_ABS_F : - F_MATH_2<"max.xorsign.abs.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f, + F_MATH_2<"max.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F : - F_MATH_2<"max.ftz.xorsign.abs.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f, + F_MATH_2<"max.ftz.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_ftz_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F : - F_MATH_2<"max.NaN.xorsign.abs.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f, + F_MATH_2<"max.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_nan_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F : - F_MATH_2<"max.ftz.NaN.xorsign.abs.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f, + F_MATH_2<"max.ftz.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_ftz_nan_xorsign_abs_f, [hasPTX<72>, hasSM<86>]>; -def INT_NVVM_FMIN_D : F_MATH_2<"min.f64", Float64Regs, - Float64Regs, Float64Regs, int_nvvm_fmin_d>; -def INT_NVVM_FMAX_D : F_MATH_2<"max.f64", Float64Regs, - Float64Regs, Float64Regs, int_nvvm_fmax_d>; +def INT_NVVM_FMIN_D : F_MATH_2<"min.f64", B64, B64, B64, int_nvvm_fmin_d>; +def INT_NVVM_FMAX_D : F_MATH_2<"max.f64", B64, B64, B64, int_nvvm_fmax_d>; // // Min Max f16, f16x2, bf16, bf16x2 @@ -1080,67 +1062,67 @@ class MIN_MAX_TUPLE { foreach P = [ MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16, - int_nvvm_fmax_f16), Int16Regs>, + int_nvvm_fmax_f16), B16>, MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16, - int_nvvm_fmax_ftz_f16), Int16Regs>, + int_nvvm_fmax_ftz_f16), B16>, MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16, - int_nvvm_fmax_nan_f16), Int16Regs>, + int_nvvm_fmax_nan_f16), B16>, MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"), - int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>, + int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), B16>, MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16), - Int16Regs, [hasPTX<72>, hasSM<86>]>, + B16, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16), - Int16Regs, [hasPTX<72>, hasSM<86>]>, + B16, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16), - Int16Regs, [hasPTX<72>, hasSM<86>]>, + B16, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_nan_xorsign_abs_f16, - int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>, + int_nvvm_fmax_ftz_nan_xorsign_abs_f16), B16, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2, - int_nvvm_fmax_f16x2), Int32Regs>, + int_nvvm_fmax_f16x2), B32>, MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"), - int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>, + int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), B32>, MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"), - int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>, + int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), B32>, MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"), - int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>, + int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), B32>, MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2), - Int32Regs, [hasPTX<72>, hasSM<86>]>, + B32, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2), - Int32Regs, [hasPTX<72>, hasSM<86>]>, + B32, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2), - Int32Regs, [hasPTX<72>, hasSM<86>]>, + B32, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2, int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2), - Int32Regs, [hasPTX<72>, hasSM<86>]>, + B32, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"), - int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>, + int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), B16>, MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16, - int_nvvm_fmax_nan_bf16), Int16Regs>, + int_nvvm_fmax_nan_bf16), B16>, MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16), - Int16Regs, [hasPTX<72>, hasSM<86>]>, + B16, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16), - Int16Regs, [hasPTX<72>, hasSM<86>]>, + B16, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2, - int_nvvm_fmax_bf16x2), Int32Regs>, + int_nvvm_fmax_bf16x2), B32>, MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"), - int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>, + int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), B32>, MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2), - Int32Regs, [hasPTX<72>, hasSM<86>]>, + B32, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_xorsign_abs_bf16x2, int_nvvm_fmax_nan_xorsign_abs_bf16x2), - Int32Regs, [hasPTX<72>, hasSM<86>]>] in { + B32, [hasPTX<72>, hasSM<86>]>] in { def P.Variant : F_MATH_2; @@ -1154,85 +1136,50 @@ defm INT_NVVM_FMAN : MIN_MAX<"max">; // Multiplication // -def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16", Int16Regs, - Int16Regs, Int16Regs, int_nvvm_mulhi_s>; -def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16", Int16Regs, - Int16Regs, Int16Regs, int_nvvm_mulhi_us>; -def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32", Int32Regs, - Int32Regs, Int32Regs, int_nvvm_mulhi_i>; -def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32", Int32Regs, - Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; -def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64", Int64Regs, - Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; -def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64", Int64Regs, - Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; - -def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; -def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; -def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; -def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; -def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; -def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; -def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; -def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; - -def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; -def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; -def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; -def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; - -def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32", - Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; -def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32", - Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; +def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16", B16, B16, B16, int_nvvm_mulhi_s>; +def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16", B16, B16, B16, int_nvvm_mulhi_us>; +def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32", B32, B32, B32, int_nvvm_mulhi_i>; +def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32", B32, B32, B32, int_nvvm_mulhi_ui>; +def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64", B64, B64, B64, int_nvvm_mulhi_ll>; +def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64", B64, B64, B64, int_nvvm_mulhi_ull>; + +def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32", B32, B32, B32, int_nvvm_mul_rn_ftz_f>; +def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32", B32, B32, B32, int_nvvm_mul_rn_f>; +def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32", B32, B32, B32, int_nvvm_mul_rz_ftz_f>; +def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32", B32, B32, B32, int_nvvm_mul_rz_f>; +def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32", B32, B32, B32, int_nvvm_mul_rm_ftz_f>; +def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32", B32, B32, B32, int_nvvm_mul_rm_f>; +def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32", B32, B32, B32, int_nvvm_mul_rp_ftz_f>; +def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32", B32, B32, B32, int_nvvm_mul_rp_f>; + +def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64", B64, B64, B64, int_nvvm_mul_rn_d>; +def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64", B64, B64, B64, int_nvvm_mul_rz_d>; +def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64", B64, B64, B64, int_nvvm_mul_rm_d>; +def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64", B64, B64, B64, int_nvvm_mul_rp_d>; + +def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32", B32, B32, B32, int_nvvm_mul24_i>; +def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32", B32, B32, B32, int_nvvm_mul24_ui>; // // Div // -def INT_NVVM_DIV_APPROX_FTZ_F - : F_MATH_2<"div.approx.ftz.f32", Float32Regs, - Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; -def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; - -def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; -def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; -def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; -def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; -def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; -def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; -def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; -def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; - -def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; -def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; -def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; -def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; +def INT_NVVM_DIV_APPROX_FTZ_F : F_MATH_2<"div.approx.ftz.f32", B32, B32, B32, int_nvvm_div_approx_ftz_f>; +def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32", B32, B32, B32, int_nvvm_div_approx_f>; + +def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32", B32, B32, B32, int_nvvm_div_rn_ftz_f>; +def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32", B32, B32, B32, int_nvvm_div_rn_f>; +def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32", B32, B32, B32, int_nvvm_div_rz_ftz_f>; +def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32", B32, B32, B32, int_nvvm_div_rz_f>; +def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32", B32, B32, B32, int_nvvm_div_rm_ftz_f>; +def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32", B32, B32, B32, int_nvvm_div_rm_f>; +def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32", B32, B32, B32, int_nvvm_div_rp_ftz_f>; +def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32", B32, B32, B32, int_nvvm_div_rp_f>; + +def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64", B64, B64, B64, int_nvvm_div_rn_d>; +def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64", B64, B64, B64, int_nvvm_div_rz_d>; +def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64", B64, B64, B64, int_nvvm_div_rm_d>; +def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64", B64, B64, B64, int_nvvm_div_rp_d>; def : Pat<(int_nvvm_div_full f32:$a, f32:$b), (FDIV32rr $a, $b)>; @@ -1250,18 +1197,12 @@ def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b), // Sad // -def INT_NVVM_SAD_S : F_MATH_3<"sad.s16", - Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_s>; -def INT_NVVM_SAD_US : F_MATH_3<"sad.u16", - Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_us>; -def INT_NVVM_SAD_I : F_MATH_3<"sad.s32", - Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; -def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32", - Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; -def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64", - Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ll>; -def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64", - Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ull>; +def INT_NVVM_SAD_S : F_MATH_3<"sad.s16", B16, B16, B16, B16, int_nvvm_sad_s>; +def INT_NVVM_SAD_US : F_MATH_3<"sad.u16", B16, B16, B16, B16, int_nvvm_sad_us>; +def INT_NVVM_SAD_I : F_MATH_3<"sad.s32", B32, B32, B32, B32, int_nvvm_sad_i>; +def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32", B32, B32, B32, B32, int_nvvm_sad_ui>; +def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64", B64, B64, B64, B64, int_nvvm_sad_ll>; +def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64", B64, B64, B64, B64, int_nvvm_sad_ull>; // // Floor Ceil @@ -1301,12 +1242,12 @@ defm ABS_F64 : F_ABS<"f64", F64RT, support_ftz = false>; def fcopysign_nvptx : SDNode<"NVPTXISD::FCOPYSIGN", SDTFPBinOp>; def COPYSIGN_F : - BasicNVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src0, Float32Regs:$src1), + BasicNVPTXInst<(outs B32:$dst), (ins B32:$src0, B32:$src1), "copysign.f32", [(set f32:$dst, (fcopysign_nvptx f32:$src1, f32:$src0))]>; def COPYSIGN_D : - BasicNVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src0, Float64Regs:$src1), + BasicNVPTXInst<(outs B64:$dst), (ins B64:$src0, B64:$src1), "copysign.f64", [(set f64:$dst, (fcopysign_nvptx f64:$src1, f64:$src0))]>; @@ -1404,59 +1345,59 @@ class FMA_TUPLE, - FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>, - FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>, - FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>, - - FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>, - FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>, - FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>, - FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>, - FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>, - FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>, - FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>, - FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>, - - FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs, + FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, B64>, + FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, B64>, + FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, B64>, + FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, B64>, + + FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, B32>, + FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, B32>, + FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, B32>, + FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, B32>, + FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, B32>, + FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, B32>, + FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, B32>, + FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, B32>, + + FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, B16, [hasPTX<42>, hasSM<53>]>, + FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, B16, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs, + FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, B16, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs, + FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, B16, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs, + FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, B16, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs, + FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, B16, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs, + FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, B16, [hasPTX<70>, hasSM<80>]>, + FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, B16, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs, + FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, B16, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs, + FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, B16, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs, + FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, B16, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs, + FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, B16, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs, + FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, B32, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs, + FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, B32, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs, + FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, B32, [hasPTX<42>, hasSM<53>]>, FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2, - Int32Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs, + B32, [hasPTX<42>, hasSM<53>]>, + FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, B32, [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2, - Int32Regs, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs, + B32, [hasPTX<70>, hasSM<80>]>, + FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, B32, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs, + FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, B32, [hasPTX<70>, hasSM<80>]> ] in { def P.Variant : @@ -1566,31 +1507,19 @@ def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)), // Add // -def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; -def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; -def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; -def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; -def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; -def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; -def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; -def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32", - Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; - -def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; -def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; -def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; -def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64", - Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; +def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32", B32, B32, B32, int_nvvm_add_rn_ftz_f>; +def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32", B32, B32, B32, int_nvvm_add_rn_f>; +def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32", B32, B32, B32, int_nvvm_add_rz_ftz_f>; +def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32", B32, B32, B32, int_nvvm_add_rz_f>; +def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32", B32, B32, B32, int_nvvm_add_rm_ftz_f>; +def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32", B32, B32, B32, int_nvvm_add_rm_f>; +def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32", B32, B32, B32, int_nvvm_add_rp_ftz_f>; +def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32", B32, B32, B32, int_nvvm_add_rp_f>; + +def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64", B64, B64, B64, int_nvvm_add_rn_d>; +def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64", B64, B64, B64, int_nvvm_add_rz_d>; +def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64", B64, B64, B64, int_nvvm_add_rm_d>; +def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64", B64, B64, B64, int_nvvm_add_rp_d>; // // BFIND @@ -1600,12 +1529,12 @@ foreach t = [I32RT, I64RT] in { foreach sign = ["s", "u"] in { defvar flo_intrin = !cast("int_nvvm_flo_" # sign); def BFIND_ # sign # t.Size - : BasicNVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src), + : BasicNVPTXInst<(outs B32:$dst), (ins t.RC:$src), "bfind." # sign # t.Size, [(set i32:$dst, (flo_intrin t.Ty:$src, 0))]>; def BFIND_SHIFTAMT_ # sign # t.Size - : BasicNVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src), + : BasicNVPTXInst<(outs B32:$dst), (ins t.RC:$src), "bfind.shiftamt." # sign # t.Size, [(set i32:$dst, (flo_intrin t.Ty:$src, -1))]>; } @@ -1856,24 +1785,24 @@ let Predicates = [hasPTX<86>, hasSM<100>, hasArchAccelFeatures] in { // class INT_FNS_MBO - : BasicNVPTXInst<(outs Int32Regs:$dst), ins, + : BasicNVPTXInst<(outs B32:$dst), ins, "fns.b32", [(set i32:$dst, Operands)]>, Requires<[hasPTX<60>, hasSM<30>]>; -def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), +def INT_FNS_rrr : INT_FNS_MBO<(ins B32:$mask, B32:$base, B32:$offset), (int_nvvm_fns i32:$mask, i32:$base, i32:$offset)>; -def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), +def INT_FNS_rri : INT_FNS_MBO<(ins B32:$mask, B32:$base, i32imm:$offset), (int_nvvm_fns i32:$mask, i32:$base, imm:$offset)>; -def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), +def INT_FNS_rir : INT_FNS_MBO<(ins B32:$mask, i32imm:$base, B32:$offset), (int_nvvm_fns i32:$mask, imm:$base, i32:$offset)>; -def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), +def INT_FNS_rii : INT_FNS_MBO<(ins B32:$mask, i32imm:$base, i32imm:$offset), (int_nvvm_fns i32:$mask, imm:$base, imm:$offset)>; -def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), +def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, B32:$base, B32:$offset), (int_nvvm_fns imm:$mask, i32:$base, i32:$offset)>; -def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), +def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, B32:$base, i32imm:$offset), (int_nvvm_fns imm:$mask, i32:$base, imm:$offset)>; -def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), +def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, B32:$offset), (int_nvvm_fns imm:$mask, imm:$base, i32:$offset)>; def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; @@ -2145,10 +2074,10 @@ class LDU_G : NVPTXInst<(outs regclass:$result), (ins ADDR:$src), "ldu.global." # TyStr # " \t$result, [$src];", []>; -def LDU_GLOBAL_i8 : LDU_G<"b8", Int16Regs>; -def LDU_GLOBAL_i16 : LDU_G<"b16", Int16Regs>; -def LDU_GLOBAL_i32 : LDU_G<"b32", Int32Regs>; -def LDU_GLOBAL_i64 : LDU_G<"b64", Int64Regs>; +def LDU_GLOBAL_i8 : LDU_G<"b8", B16>; +def LDU_GLOBAL_i16 : LDU_G<"b16", B16>; +def LDU_GLOBAL_i32 : LDU_G<"b32", B32>; +def LDU_GLOBAL_i64 : LDU_G<"b64", B64>; // vector @@ -2165,14 +2094,14 @@ class VLDU_G_ELE_V4 "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>; -def LDU_GLOBAL_v2i8 : VLDU_G_ELE_V2<"b8", Int16Regs>; -def LDU_GLOBAL_v2i16 : VLDU_G_ELE_V2<"b16", Int16Regs>; -def LDU_GLOBAL_v2i32 : VLDU_G_ELE_V2<"b32", Int32Regs>; -def LDU_GLOBAL_v2i64 : VLDU_G_ELE_V2<"b64", Int64Regs>; +def LDU_GLOBAL_v2i8 : VLDU_G_ELE_V2<"b8", B16>; +def LDU_GLOBAL_v2i16 : VLDU_G_ELE_V2<"b16", B16>; +def LDU_GLOBAL_v2i32 : VLDU_G_ELE_V2<"b32", B32>; +def LDU_GLOBAL_v2i64 : VLDU_G_ELE_V2<"b64", B64>; -def LDU_GLOBAL_v4i8 : VLDU_G_ELE_V4<"b8", Int16Regs>; -def LDU_GLOBAL_v4i16 : VLDU_G_ELE_V4<"b16", Int16Regs>; -def LDU_GLOBAL_v4i32 : VLDU_G_ELE_V4<"b32", Int32Regs>; +def LDU_GLOBAL_v4i8 : VLDU_G_ELE_V4<"b8", B16>; +def LDU_GLOBAL_v4i16 : VLDU_G_ELE_V4<"b16", B16>; +def LDU_GLOBAL_v4i32 : VLDU_G_ELE_V4<"b32", B32>; //----------------------------------- @@ -2187,10 +2116,10 @@ class LDG_G : NVPTXInst<(outs regclass:$result), (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src), "ld.global.nc.${Sign:sign}$fromWidth \t$result, [$src];", []>; -def LD_GLOBAL_NC_i8 : LDG_G; -def LD_GLOBAL_NC_i16 : LDG_G; -def LD_GLOBAL_NC_i32 : LDG_G; -def LD_GLOBAL_NC_i64 : LDG_G; +def LD_GLOBAL_NC_i8 : LDG_G; +def LD_GLOBAL_NC_i16 : LDG_G; +def LD_GLOBAL_NC_i32 : LDG_G; +def LD_GLOBAL_NC_i64 : LDG_G; // vector @@ -2213,33 +2142,33 @@ class VLDG_G_ELE_V8 : "ld.global.nc.v8.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, [$src];", []>; // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. -def LD_GLOBAL_NC_v2i8 : VLDG_G_ELE_V2; -def LD_GLOBAL_NC_v2i16 : VLDG_G_ELE_V2; -def LD_GLOBAL_NC_v2i32 : VLDG_G_ELE_V2; -def LD_GLOBAL_NC_v2i64 : VLDG_G_ELE_V2; +def LD_GLOBAL_NC_v2i8 : VLDG_G_ELE_V2; +def LD_GLOBAL_NC_v2i16 : VLDG_G_ELE_V2; +def LD_GLOBAL_NC_v2i32 : VLDG_G_ELE_V2; +def LD_GLOBAL_NC_v2i64 : VLDG_G_ELE_V2; -def LD_GLOBAL_NC_v4i8 : VLDG_G_ELE_V4; -def LD_GLOBAL_NC_v4i16 : VLDG_G_ELE_V4; -def LD_GLOBAL_NC_v4i32 : VLDG_G_ELE_V4; +def LD_GLOBAL_NC_v4i8 : VLDG_G_ELE_V4; +def LD_GLOBAL_NC_v4i16 : VLDG_G_ELE_V4; +def LD_GLOBAL_NC_v4i32 : VLDG_G_ELE_V4; -def LD_GLOBAL_NC_v4i64 : VLDG_G_ELE_V4; -def LD_GLOBAL_NC_v8i32 : VLDG_G_ELE_V8; +def LD_GLOBAL_NC_v4i64 : VLDG_G_ELE_V4; +def LD_GLOBAL_NC_v8i32 : VLDG_G_ELE_V8; multiclass NG_TO_G Preds = []> { if Supports32 then - def "" : BasicNVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + def "" : BasicNVPTXInst<(outs B32:$result), (ins B32:$src), "cvta." # Str # ".u32", []>, Requires; - def _64 : BasicNVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + def _64 : BasicNVPTXInst<(outs B64:$result), (ins B64:$src), "cvta." # Str # ".u64", []>, Requires; } multiclass G_TO_NG Preds = []> { if Supports32 then - def "" : BasicNVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + def "" : BasicNVPTXInst<(outs B32:$result), (ins B32:$src), "cvta.to." # Str # ".u32", []>, Requires; - def _64 : BasicNVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + def _64 : BasicNVPTXInst<(outs B64:$result), (ins B64:$src), "cvta.to." # Str # ".u64", []>, Requires; } @@ -2253,64 +2182,64 @@ defm cvta_to_shared_cluster : G_TO_NG<"shared::cluster", false, [hasClusters]>; // nvvm.move intrinsicc -def nvvm_move_i16 : BasicNVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), +def nvvm_move_i16 : BasicNVPTXInst<(outs B16:$r), (ins B16:$s), "mov.b16", [(set i16:$r, (int_nvvm_move_i16 i16:$s))]>; -def nvvm_move_i32 : BasicNVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), +def nvvm_move_i32 : BasicNVPTXInst<(outs B32:$r), (ins B32:$s), "mov.b32", [(set i32:$r, (int_nvvm_move_i32 i32:$s))]>; -def nvvm_move_i64 : BasicNVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), +def nvvm_move_i64 : BasicNVPTXInst<(outs B64:$r), (ins B64:$s), "mov.b64", [(set i64:$r, (int_nvvm_move_i64 i64:$s))]>; -def nvvm_move_float : BasicNVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), +def nvvm_move_float : BasicNVPTXInst<(outs B32:$r), (ins B32:$s), "mov.f32", [(set f32:$r, (int_nvvm_move_float f32:$s))]>; -def nvvm_move_double : BasicNVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), +def nvvm_move_double : BasicNVPTXInst<(outs B64:$r), (ins B64:$s), "mov.f64", [(set f64:$r, (int_nvvm_move_double f64:$s))]>; -def nvvm_move_ptr32 : BasicNVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), +def nvvm_move_ptr32 : BasicNVPTXInst<(outs B32:$r), (ins B32:$s), "mov.u32", [(set i32:$r, (int_nvvm_move_ptr i32:$s))]>; -def nvvm_move_ptr64 : BasicNVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), +def nvvm_move_ptr64 : BasicNVPTXInst<(outs B64:$r), (ins B64:$s), "mov.u64", [(set i64:$r, (int_nvvm_move_ptr i64:$s))]>; // @TODO: Are these actually needed, or will we always just see symbols // copied to registers first? -/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins ADDR_base:$s), +/*def nvvm_move_sym32 : NVPTXInst<(outs B32:$r), (ins ADDR_base:$s), "mov.u32 \t$r, $s;", - [(set Int32Regs:$r, + [(set B32:$r, (int_nvvm_move_ptr texternalsym:$s))]>; -def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins ADDR_base:$s), +def nvvm_move_sym64 : NVPTXInst<(outs B64:$r), (ins ADDR_base:$s), "mov.u64 \t$r, $s;", - [(set Int64Regs:$r, + [(set B64:$r, (int_nvvm_move_ptr texternalsym:$s))]>;*/ def texsurf_handles - : BasicNVPTXInst<(outs Int64Regs:$result), (ins ADDR_base:$src), "mov.u64">; + : BasicNVPTXInst<(outs B64:$result), (ins ADDR_base:$src), "mov.u64">; //----------------------------------- // Compiler Error Warn // - Just ignore them in codegen //----------------------------------- -def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), +def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins B32:$a), "// llvm.nvvm.compiler.warn()", [(int_nvvm_compiler_warn i32:$a)]>; -def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), +def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins B64:$a), "// llvm.nvvm.compiler.warn()", [(int_nvvm_compiler_warn i64:$a)]>; -def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), +def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins B32:$a), "// llvm.nvvm.compiler.error()", [(int_nvvm_compiler_error i32:$a)]>; -def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), +def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins B64:$a), "// llvm.nvvm.compiler.error()", [(int_nvvm_compiler_error i64:$a)]>; @@ -2318,11 +2247,11 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), // isspacep multiclass ISSPACEP Preds = []> { - def _32: BasicNVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), + def _32: BasicNVPTXInst<(outs B1:$d), (ins B32:$a), "isspacep." # suffix, [(set i1:$d, (Intr i32:$a))]>, Requires; - def _64: BasicNVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + def _64: BasicNVPTXInst<(outs B1:$d), (ins B64:$a), "isspacep." # suffix, [(set i1:$d, (Intr i64:$a))]>, Requires; @@ -2337,7 +2266,7 @@ defm isspace_shared_cluster : ISSPACEP<"shared::cluster", [hasPTX<78>, hasSM<90>]>; // Special register reads -def MOV_SPECIAL : BasicNVPTXInst<(outs Int32Regs:$d), +def MOV_SPECIAL : BasicNVPTXInst<(outs B32:$d), (ins SpecialRegs:$r), "mov.b32", []>; @@ -2385,757 +2314,514 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; let IsTex = true, IsTexModeUnified = false in { // Texture fetch instructions using handles -class TEX_1D_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins intype:$x)), +class TEX_1D_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$x)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", pattern>; -multiclass TEX_1D { - def _RR : TEX_1D_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, intype:$x))]>; - def _RI : TEX_1D_base; - def _IR : TEX_1D_base; - def _II : TEX_1D_base; -} - -defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs, - int_nvvm_tex_1d_v4f32_s32>; -defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_1d_v4f32_f32>; -defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_1d_v4s32_s32>; -defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_v4s32_f32>; -defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_1d_v4u32_s32>; -defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_v4u32_f32>; - -class TEX_1D_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins intype:$x, intype:$lod)), +multiclass TEX_1D { + def _RR : TEX_1D_base; + def _RI : TEX_1D_base; + def _IR : TEX_1D_base; + def _II : TEX_1D_base; +} + +defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", int_nvvm_tex_1d_v4f32_s32>; +defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", int_nvvm_tex_1d_v4f32_f32>; +defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", int_nvvm_tex_1d_v4s32_s32>; +defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", int_nvvm_tex_1d_v4s32_f32>; +defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", int_nvvm_tex_1d_v4u32_s32>; +defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", int_nvvm_tex_1d_v4u32_f32>; + +class TEX_1D_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$x, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;", pattern>; -multiclass TEX_1D_LEVEL { - def _RR : TEX_1D_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, intype:$x, intype:$lod))]>; - def _RI : TEX_1D_LEVEL_base; - def _IR : TEX_1D_LEVEL_base; - def _II : TEX_1D_LEVEL_base; +multiclass TEX_1D_LEVEL { + def _RR : TEX_1D_LEVEL_base; + def _RI : TEX_1D_LEVEL_base; + def _IR : TEX_1D_LEVEL_base; + def _II : TEX_1D_LEVEL_base; } defm TEX_1D_F32_F32_LEVEL : - TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_1d_level_v4f32_f32>; + TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", int_nvvm_tex_1d_level_v4f32_f32>; defm TEX_1D_S32_F32_LEVEL : - TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_level_v4s32_f32>; + TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", int_nvvm_tex_1d_level_v4s32_f32>; defm TEX_1D_U32_F32_LEVEL : - TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_level_v4u32_f32>; - -class TEX_1D_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)), + TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", int_nvvm_tex_1d_level_v4u32_f32>; + +class TEX_1D_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$x, B32:$gradx, B32:$grady)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}]," " \\{$gradx\\}, \\{$grady\\};", pattern>; -multiclass TEX_1D_GRAD { - def _RR : TEX_1D_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, intype:$x, intype:$gradx, intype:$grady))]>; - def _RI : TEX_1D_GRAD_base; - def _IR : TEX_1D_GRAD_base; - def _II : TEX_1D_GRAD_base; +multiclass TEX_1D_GRAD { + def _RR : TEX_1D_GRAD_base; + def _RI : TEX_1D_GRAD_base; + def _IR : TEX_1D_GRAD_base; + def _II : TEX_1D_GRAD_base; } defm TEX_1D_F32_F32_GRAD - : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_1d_grad_v4f32_f32>; + : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", int_nvvm_tex_1d_grad_v4f32_f32>; defm TEX_1D_S32_F32_GRAD - : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_grad_v4s32_f32>; + : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", int_nvvm_tex_1d_grad_v4s32_f32>; defm TEX_1D_U32_F32_GRAD - : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_grad_v4u32_f32>; - -class TEX_1D_ARRAY_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins Int32Regs:$l, intype:$x)), + : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", int_nvvm_tex_1d_grad_v4u32_f32>; + +class TEX_1D_ARRAY_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$l, B32:$x)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];", pattern>; -multiclass TEX_1D_ARRAY { - def _RR : TEX_1D_ARRAY_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, i32:$l, intype:$x))]>; - def _RI : TEX_1D_ARRAY_base; - def _IR : TEX_1D_ARRAY_base; - def _II : TEX_1D_ARRAY_base; +multiclass TEX_1D_ARRAY { + def _RR : TEX_1D_ARRAY_base; + def _RI : TEX_1D_ARRAY_base; + def _IR : TEX_1D_ARRAY_base; + def _II : TEX_1D_ARRAY_base; } defm TEX_1D_ARRAY_F32_F32 - : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_1d_array_v4f32_f32>; + : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", int_nvvm_tex_1d_array_v4f32_f32>; defm TEX_1D_ARRAY_F32_S32 - : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs, - int_nvvm_tex_1d_array_v4f32_s32>; + : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", int_nvvm_tex_1d_array_v4f32_s32>; defm TEX_1D_ARRAY_S32_S32 - : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_1d_array_v4s32_s32>; + : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", int_nvvm_tex_1d_array_v4s32_s32>; defm TEX_1D_ARRAY_S32_F32 - : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_array_v4s32_f32>; + : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", int_nvvm_tex_1d_array_v4s32_f32>; defm TEX_1D_ARRAY_U32_S32 - : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_1d_array_v4u32_s32>; + : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", int_nvvm_tex_1d_array_v4u32_s32>; defm TEX_1D_ARRAY_U32_F32 - : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_array_v4u32_f32>; - -class TEX_1D_ARRAY_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)), + : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", int_nvvm_tex_1d_array_v4u32_f32>; + +class TEX_1D_ARRAY_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$l, B32:$x, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x\\}], $lod;", pattern>; -multiclass TEX_1D_ARRAY_LEVEL { - def _RR : TEX_1D_ARRAY_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$lod))]>; - def _RI : TEX_1D_ARRAY_LEVEL_base; - def _IR : TEX_1D_ARRAY_LEVEL_base; - def _II : TEX_1D_ARRAY_LEVEL_base; +multiclass TEX_1D_ARRAY_LEVEL { + def _RR : TEX_1D_ARRAY_LEVEL_base; + def _RI : TEX_1D_ARRAY_LEVEL_base; + def _IR : TEX_1D_ARRAY_LEVEL_base; + def _II : TEX_1D_ARRAY_LEVEL_base; } defm TEX_1D_ARRAY_F32_F32_LEVEL - : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_1d_array_level_v4f32_f32>; + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", int_nvvm_tex_1d_array_level_v4f32_f32>; defm TEX_1D_ARRAY_S32_F32_LEVEL - : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_array_level_v4s32_f32>; + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", int_nvvm_tex_1d_array_level_v4s32_f32>; defm TEX_1D_ARRAY_U32_F32_LEVEL - : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_array_level_v4u32_f32>; - -class TEX_1D_ARRAY_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins Int32Regs:$l, intype:$x, - intype:$gradx, intype:$grady)), + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", int_nvvm_tex_1d_array_level_v4u32_f32>; + +class TEX_1D_ARRAY_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$l, B32:$x, B32:$gradx, B32:$grady)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}]," " \\{$gradx\\}, \\{$grady\\};", pattern>; -multiclass TEX_1D_ARRAY_GRAD { - def _RR : TEX_1D_ARRAY_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, i32:$l, intype:$x, - intype:$gradx, intype:$grady))]>; - def _RI : TEX_1D_ARRAY_GRAD_base; - def _IR : TEX_1D_ARRAY_GRAD_base; - def _II : TEX_1D_ARRAY_GRAD_base; +multiclass TEX_1D_ARRAY_GRAD { + def _RR : TEX_1D_ARRAY_GRAD_base; + def _RI : TEX_1D_ARRAY_GRAD_base; + def _IR : TEX_1D_ARRAY_GRAD_base; + def _II : TEX_1D_ARRAY_GRAD_base; } defm TEX_1D_ARRAY_F32_F32_GRAD - : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_1d_array_grad_v4f32_f32>; + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", int_nvvm_tex_1d_array_grad_v4f32_f32>; defm TEX_1D_ARRAY_S32_F32_GRAD - : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_array_grad_v4s32_f32>; + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", int_nvvm_tex_1d_array_grad_v4s32_f32>; defm TEX_1D_ARRAY_U32_F32_GRAD - : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_1d_array_grad_v4u32_f32>; - -class TEX_2D_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins intype:$x, intype:$y)), + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", int_nvvm_tex_1d_array_grad_v4u32_f32>; + +class TEX_2D_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$x, B32:$y)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];", pattern>; -multiclass TEX_2D { - def _RR : TEX_2D_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, intype:$x, intype:$y))]>; - def _RI : TEX_2D_base; - def _IR : TEX_2D_base; - def _II : TEX_2D_base; -} - -defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_2d_v4f32_f32>; -defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs, - int_nvvm_tex_2d_v4f32_s32>; -defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_2d_v4s32_s32>; -defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_v4s32_f32>; -defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_2d_v4u32_s32>; -defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_v4u32_f32>; - -class TEX_2D_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)), +multiclass TEX_2D { + def _RR : TEX_2D_base; + def _RI : TEX_2D_base; + def _IR : TEX_2D_base; + def _II : TEX_2D_base; +} + +defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", int_nvvm_tex_2d_v4f32_f32>; +defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", int_nvvm_tex_2d_v4f32_s32>; +defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", int_nvvm_tex_2d_v4s32_s32>; +defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", int_nvvm_tex_2d_v4s32_f32>; +defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", int_nvvm_tex_2d_v4u32_s32>; +defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", int_nvvm_tex_2d_v4u32_f32>; + +class TEX_2D_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$x, B32:$y, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$x, $y\\}], $lod;", pattern>; -multiclass TEX_2D_LEVEL { - def _RR : TEX_2D_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$lod))]>; - def _RI : TEX_2D_LEVEL_base; - def _IR : TEX_2D_LEVEL_base; - def _II : TEX_2D_LEVEL_base; +multiclass TEX_2D_LEVEL { + def _RR : TEX_2D_LEVEL_base; + def _RI : TEX_2D_LEVEL_base; + def _IR : TEX_2D_LEVEL_base; + def _II : TEX_2D_LEVEL_base; } defm TEX_2D_F32_F32_LEVEL : - TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_2d_level_v4f32_f32>; + TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", int_nvvm_tex_2d_level_v4f32_f32>; defm TEX_2D_S32_F32_LEVEL : - TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_level_v4s32_f32>; + TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", int_nvvm_tex_2d_level_v4s32_f32>; defm TEX_2D_U32_F32_LEVEL : - TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_level_v4u32_f32>; - -class TEX_2D_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins intype:$x, intype:$y, - intype:$gradx0, intype:$gradx1, - intype:$grady0, intype:$grady1)), + TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", int_nvvm_tex_2d_level_v4u32_f32>; + +class TEX_2D_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$x, B32:$y, + B32:$gradx0, B32:$gradx1, + B32:$grady0, B32:$grady1)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}]," " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", pattern>; -multiclass TEX_2D_GRAD { - def _RR : TEX_2D_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, intype:$x, intype:$y, - intype:$gradx0, intype:$gradx1, - intype:$grady0, intype:$grady1))]>; - def _RI : TEX_2D_GRAD_base; - def _IR : TEX_2D_GRAD_base; - def _II : TEX_2D_GRAD_base; +multiclass TEX_2D_GRAD { + def _RR : TEX_2D_GRAD_base; + def _RI : TEX_2D_GRAD_base; + def _IR : TEX_2D_GRAD_base; + def _II : TEX_2D_GRAD_base; } defm TEX_2D_F32_F32_GRAD : - TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_2d_grad_v4f32_f32>; + TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", int_nvvm_tex_2d_grad_v4f32_f32>; defm TEX_2D_S32_F32_GRAD : - TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_grad_v4s32_f32>; + TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", int_nvvm_tex_2d_grad_v4s32_f32>; defm TEX_2D_U32_F32_GRAD : - TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_grad_v4u32_f32>; - -class TEX_2D_ARRAY_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)), + TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", int_nvvm_tex_2d_grad_v4u32_f32>; + +class TEX_2D_ARRAY_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$l, B32:$x, B32:$y)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x, $y, $y\\}];", pattern>; -multiclass TEX_2D_ARRAY { - def _RR : TEX_2D_ARRAY_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y))]>; - def _RI : TEX_2D_ARRAY_base; - def _IR : TEX_2D_ARRAY_base; - def _II : TEX_2D_ARRAY_base; +multiclass TEX_2D_ARRAY { + def _RR : TEX_2D_ARRAY_base; + def _RI : TEX_2D_ARRAY_base; + def _IR : TEX_2D_ARRAY_base; + def _II : TEX_2D_ARRAY_base; } defm TEX_2D_ARRAY_F32_F32 - : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_2d_array_v4f32_f32>; + : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", int_nvvm_tex_2d_array_v4f32_f32>; defm TEX_2D_ARRAY_F32_S32 - : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs, - int_nvvm_tex_2d_array_v4f32_s32>; + : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", int_nvvm_tex_2d_array_v4f32_s32>; defm TEX_2D_ARRAY_S32_S32 - : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_2d_array_v4s32_s32>; + : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", int_nvvm_tex_2d_array_v4s32_s32>; defm TEX_2D_ARRAY_S32_F32 - : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_array_v4s32_f32>; + : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", int_nvvm_tex_2d_array_v4s32_f32>; defm TEX_2D_ARRAY_U32_S32 - : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_2d_array_v4u32_s32>; + : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", int_nvvm_tex_2d_array_v4u32_s32>; defm TEX_2D_ARRAY_U32_F32 - : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_array_v4u32_f32>; - -class TEX_2D_ARRAY_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, - intype:$lod)), + : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", int_nvvm_tex_2d_array_v4u32_f32>; + +class TEX_2D_ARRAY_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$l, B32:$x, B32:$y, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;", pattern>; -multiclass TEX_2D_ARRAY_LEVEL { - def _RR : TEX_2D_ARRAY_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$lod))]>; - def _RI : TEX_2D_ARRAY_LEVEL_base; - def _IR : TEX_2D_ARRAY_LEVEL_base; - def _II : TEX_2D_ARRAY_LEVEL_base; +multiclass TEX_2D_ARRAY_LEVEL { + def _RR : TEX_2D_ARRAY_LEVEL_base; + def _RI : TEX_2D_ARRAY_LEVEL_base; + def _IR : TEX_2D_ARRAY_LEVEL_base; + def _II : TEX_2D_ARRAY_LEVEL_base; } defm TEX_2D_ARRAY_F32_F32_LEVEL - : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_2d_array_level_v4f32_f32>; + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", int_nvvm_tex_2d_array_level_v4f32_f32>; defm TEX_2D_ARRAY_S32_F32_LEVEL - : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_array_level_v4s32_f32>; + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", int_nvvm_tex_2d_array_level_v4s32_f32>; defm TEX_2D_ARRAY_U32_F32_LEVEL - : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_array_level_v4u32_f32>; - -class TEX_2D_ARRAY_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, - intype:$gradx0, intype:$gradx1, - intype:$grady0, intype:$grady1)), + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", int_nvvm_tex_2d_array_level_v4u32_f32>; + +class TEX_2D_ARRAY_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$l, B32:$x, B32:$y, + B32:$gradx0, B32:$gradx1, + B32:$grady0, B32:$grady1)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x, $y, $y\\}]," " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", pattern>; -multiclass TEX_2D_ARRAY_GRAD { - def _RR : TEX_2D_ARRAY_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, - intype:$gradx0, intype:$gradx1, - intype:$grady0, intype:$grady1))]>; - def _RI : TEX_2D_ARRAY_GRAD_base; - def _IR : TEX_2D_ARRAY_GRAD_base; - def _II : TEX_2D_ARRAY_GRAD_base; +multiclass TEX_2D_ARRAY_GRAD { + def _RR : TEX_2D_ARRAY_GRAD_base; + def _RI : TEX_2D_ARRAY_GRAD_base; + def _IR : TEX_2D_ARRAY_GRAD_base; + def _II : TEX_2D_ARRAY_GRAD_base; } defm TEX_2D_ARRAY_F32_F32_GRAD - : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_2d_array_grad_v4f32_f32>; + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", int_nvvm_tex_2d_array_grad_v4f32_f32>; defm TEX_2D_ARRAY_S32_F32_GRAD - : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_array_grad_v4s32_f32>; + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", int_nvvm_tex_2d_array_grad_v4s32_f32>; defm TEX_2D_ARRAY_U32_F32_GRAD - : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_2d_array_grad_v4u32_f32>; - -class TEX_3D_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", int_nvvm_tex_2d_array_grad_v4u32_f32>; + +class TEX_3D_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$x, B32:$y, B32:$z)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$x, $y, $z, $z\\}];", pattern>; -multiclass TEX_3D { - def _RR : TEX_3D_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>; - def _RI : TEX_3D_base; - def _IR : TEX_3D_base; - def _II : TEX_3D_base; -} - -defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_3d_v4f32_f32>; -defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs, - int_nvvm_tex_3d_v4f32_s32>; -defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_3d_v4s32_s32>; -defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_3d_v4s32_f32>; -defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_3d_v4u32_s32>; -defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_3d_v4u32_f32>; - -class TEX_3D_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins intype:$x, intype:$y, intype:$z, - intype:$lod)), +multiclass TEX_3D { + def _RR : TEX_3D_base; + def _RI : TEX_3D_base; + def _IR : TEX_3D_base; + def _II : TEX_3D_base; +} + +defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", int_nvvm_tex_3d_v4f32_f32>; +defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", int_nvvm_tex_3d_v4f32_s32>; +defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", int_nvvm_tex_3d_v4s32_s32>; +defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", int_nvvm_tex_3d_v4s32_f32>; +defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", int_nvvm_tex_3d_v4u32_s32>; +defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", int_nvvm_tex_3d_v4u32_f32>; + +class TEX_3D_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$x, B32:$y, B32:$z, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", pattern>; -multiclass TEX_3D_LEVEL { - def _RR : TEX_3D_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z, - intype:$lod))]>; - def _RI : TEX_3D_LEVEL_base; - def _IR : TEX_3D_LEVEL_base; - def _II : TEX_3D_LEVEL_base; +multiclass TEX_3D_LEVEL { + def _RR : TEX_3D_LEVEL_base; + def _RI : TEX_3D_LEVEL_base; + def _IR : TEX_3D_LEVEL_base; + def _II : TEX_3D_LEVEL_base; } defm TEX_3D_F32_F32_LEVEL - : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_3d_level_v4f32_f32>; + : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", int_nvvm_tex_3d_level_v4f32_f32>; defm TEX_3D_S32_F32_LEVEL - : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_3d_level_v4s32_f32>; + : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", int_nvvm_tex_3d_level_v4s32_f32>; defm TEX_3D_U32_F32_LEVEL - : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_3d_level_v4u32_f32>; - -class TEX_3D_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins intype:$x, intype:$y, intype:$z, - intype :$gradx0, intype:$gradx1, - intype:$gradx2, intype:$grady0, - intype:$grady1, intype:$grady2)), + : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", int_nvvm_tex_3d_level_v4u32_f32>; + +class TEX_3D_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$x, B32:$y, B32:$z, + B32:$gradx0, B32:$gradx1, + B32:$gradx2, B32:$grady0, + B32:$grady1, B32:$grady2)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$x, $y, $z, $z\\}]," " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," " \\{$grady0, $grady1, $grady2, $grady2\\};", pattern>; -multiclass TEX_3D_GRAD { - def _RR : TEX_3D_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z, - intype:$gradx0, intype:$gradx1, intype:$gradx2, - intype:$grady0, intype:$grady1, intype:$grady2))]>; - def _RI : TEX_3D_GRAD_base; - def _IR : TEX_3D_GRAD_base; - def _II : TEX_3D_GRAD_base; +multiclass TEX_3D_GRAD { + def _RR : TEX_3D_GRAD_base; + def _RI : TEX_3D_GRAD_base; + def _IR : TEX_3D_GRAD_base; + def _II : TEX_3D_GRAD_base; } defm TEX_3D_F32_F32_GRAD - : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_3d_grad_v4f32_f32>; + : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", int_nvvm_tex_3d_grad_v4f32_f32>; defm TEX_3D_S32_F32_GRAD - : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_3d_grad_v4s32_f32>; + : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", int_nvvm_tex_3d_grad_v4s32_f32>; defm TEX_3D_U32_F32_GRAD - : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_3d_grad_v4u32_f32>; - -class TEX_CUBE_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), + : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", int_nvvm_tex_3d_grad_v4u32_f32>; + +class TEX_CUBE_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$x, B32:$y, B32:$z)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$x, $y, $z, $z\\}];", pattern>; -multiclass TEX_CUBE { - def _RR : TEX_CUBE_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>; - def _RI : TEX_CUBE_base; - def _IR : TEX_CUBE_base; - def _II : TEX_CUBE_base; +multiclass TEX_CUBE { + def _RR : TEX_CUBE_base; + def _RI : TEX_CUBE_base; + def _IR : TEX_CUBE_base; + def _II : TEX_CUBE_base; } defm TEX_CUBE_F32_F32 - : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_cube_v4f32_f32>; + : TEX_CUBE<"tex.cube.v4.f32.f32", int_nvvm_tex_cube_v4f32_f32>; defm TEX_CUBE_S32_F32 - : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_cube_v4s32_f32>; + : TEX_CUBE<"tex.cube.v4.s32.f32", int_nvvm_tex_cube_v4s32_f32>; defm TEX_CUBE_U32_F32 - : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_cube_v4u32_f32>; - -class TEX_CUBE_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins intype:$x, intype:$y, intype:$z, - intype:$lod)), + : TEX_CUBE<"tex.cube.v4.u32.f32", int_nvvm_tex_cube_v4u32_f32>; + +class TEX_CUBE_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$x, B32:$y, B32:$z, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", pattern>; -multiclass TEX_CUBE_LEVEL { - def _RR : TEX_CUBE_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z, - intype:$lod))]>; - def _RI : TEX_CUBE_LEVEL_base; - def _IR : TEX_CUBE_LEVEL_base; - def _II : TEX_CUBE_LEVEL_base; +multiclass TEX_CUBE_LEVEL { + def _RR : TEX_CUBE_LEVEL_base; + def _RI : TEX_CUBE_LEVEL_base; + def _IR : TEX_CUBE_LEVEL_base; + def _II : TEX_CUBE_LEVEL_base; } defm TEX_CUBE_F32_F32_LEVEL - : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_cube_level_v4f32_f32>; + : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", int_nvvm_tex_cube_level_v4f32_f32>; defm TEX_CUBE_S32_F32_LEVEL - : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_cube_level_v4s32_f32>; + : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", int_nvvm_tex_cube_level_v4s32_f32>; defm TEX_CUBE_U32_F32_LEVEL - : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_cube_level_v4u32_f32>; - -class TEX_CUBE_ARRAY_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, - intype:$z)), + : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", int_nvvm_tex_cube_level_v4u32_f32>; + +class TEX_CUBE_ARRAY_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$l, B32:$x, B32:$y, B32:$z)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x, $y, $z\\}];", pattern>; -multiclass TEX_CUBE_ARRAY { - def _RR : TEX_CUBE_ARRAY_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z))]>; - def _RI : TEX_CUBE_ARRAY_base; - def _IR : TEX_CUBE_ARRAY_base; - def _II : TEX_CUBE_ARRAY_base; +multiclass TEX_CUBE_ARRAY { + def _RR : TEX_CUBE_ARRAY_base; + def _RI : TEX_CUBE_ARRAY_base; + def _IR : TEX_CUBE_ARRAY_base; + def _II : TEX_CUBE_ARRAY_base; } defm TEX_CUBE_ARRAY_F32_F32 - : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_cube_array_v4f32_f32>; + : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", int_nvvm_tex_cube_array_v4f32_f32>; defm TEX_CUBE_ARRAY_S32_F32 - : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_cube_array_v4s32_f32>; + : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", int_nvvm_tex_cube_array_v4s32_f32>; defm TEX_CUBE_ARRAY_U32_F32 - : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_cube_array_v4u32_f32>; - -class TEX_CUBE_ARRAY_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, - intype:$z, intype:$lod)), + : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", int_nvvm_tex_cube_array_v4u32_f32>; + +class TEX_CUBE_ARRAY_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(texsamp, (ins B32:$l, B32:$x, B32:$y, B32:$z, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;", pattern>; -multiclass TEX_CUBE_ARRAY_LEVEL { - def _RR : TEX_CUBE_ARRAY_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z, - intype:$lod))]>; - def _RI : TEX_CUBE_ARRAY_LEVEL_base; - def _IR : TEX_CUBE_ARRAY_LEVEL_base; - def _II : TEX_CUBE_ARRAY_LEVEL_base; +multiclass TEX_CUBE_ARRAY_LEVEL { + def _RR : TEX_CUBE_ARRAY_LEVEL_base; + def _RI : TEX_CUBE_ARRAY_LEVEL_base; + def _IR : TEX_CUBE_ARRAY_LEVEL_base; + def _II : TEX_CUBE_ARRAY_LEVEL_base; } defm TEX_CUBE_ARRAY_F32_F32_LEVEL : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", - Float32Regs, Float32Regs, int_nvvm_tex_cube_array_level_v4f32_f32>; defm TEX_CUBE_ARRAY_S32_F32_LEVEL : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_cube_array_level_v4s32_f32>; defm TEX_CUBE_ARRAY_U32_F32_LEVEL : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_cube_array_level_v4u32_f32>; -class TLD4_2D_base pattern = []> - : NVPTXInst<(outs outtype:$v0, outtype:$v1, - outtype:$v2, outtype:$v3), - !con(texsamp, (ins intype:$x, intype:$y)), +class TLD4_2D_base pattern = []> + : NVPTXInst<(outs B32:$v0, B32:$v1, B32:$v2, B32:$v3), + !con(texsamp, (ins B32:$x, B32:$y)), inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];", pattern>; -multiclass TLD4_2D { - def _RR : TLD4_2D_base< - inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), - [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3, - (intr i64:$t, i64:$s, intype:$x, intype:$y))]>; - def _RI : TLD4_2D_base; - def _IR : TLD4_2D_base; - def _II : TLD4_2D_base; +multiclass TLD4_2D { + def _RR : TLD4_2D_base; + def _RI : TLD4_2D_base; + def _IR : TLD4_2D_base; + def _II : TLD4_2D_base; } defm TLD4_R_2D_F32_F32 - : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tld4_r_2d_v4f32_f32>; + : TLD4_2D<"tld4.r.2d.v4.f32.f32", int_nvvm_tld4_r_2d_v4f32_f32>; defm TLD4_G_2D_F32_F32 - : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tld4_g_2d_v4f32_f32>; + : TLD4_2D<"tld4.g.2d.v4.f32.f32", int_nvvm_tld4_g_2d_v4f32_f32>; defm TLD4_B_2D_F32_F32 - : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tld4_b_2d_v4f32_f32>; + : TLD4_2D<"tld4.b.2d.v4.f32.f32", int_nvvm_tld4_b_2d_v4f32_f32>; defm TLD4_A_2D_F32_F32 - : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tld4_a_2d_v4f32_f32>; + : TLD4_2D<"tld4.a.2d.v4.f32.f32", int_nvvm_tld4_a_2d_v4f32_f32>; defm TLD4_R_2D_S32_F32 - : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_r_2d_v4s32_f32>; + : TLD4_2D<"tld4.r.2d.v4.s32.f32", int_nvvm_tld4_r_2d_v4s32_f32>; defm TLD4_G_2D_S32_F32 - : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_g_2d_v4s32_f32>; + : TLD4_2D<"tld4.g.2d.v4.s32.f32", int_nvvm_tld4_g_2d_v4s32_f32>; defm TLD4_B_2D_S32_F32 - : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_b_2d_v4s32_f32>; + : TLD4_2D<"tld4.b.2d.v4.s32.f32", int_nvvm_tld4_b_2d_v4s32_f32>; defm TLD4_A_2D_S32_F32 - : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_a_2d_v4s32_f32>; + : TLD4_2D<"tld4.a.2d.v4.s32.f32", int_nvvm_tld4_a_2d_v4s32_f32>; defm TLD4_R_2D_U32_F32 - : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_r_2d_v4u32_f32>; + : TLD4_2D<"tld4.r.2d.v4.u32.f32", int_nvvm_tld4_r_2d_v4u32_f32>; defm TLD4_G_2D_U32_F32 - : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_g_2d_v4u32_f32>; + : TLD4_2D<"tld4.g.2d.v4.u32.f32", int_nvvm_tld4_g_2d_v4u32_f32>; defm TLD4_B_2D_U32_F32 - : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_b_2d_v4u32_f32>; + : TLD4_2D<"tld4.b.2d.v4.u32.f32", int_nvvm_tld4_b_2d_v4u32_f32>; defm TLD4_A_2D_U32_F32 - : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_a_2d_v4u32_f32>; + : TLD4_2D<"tld4.a.2d.v4.u32.f32", int_nvvm_tld4_a_2d_v4u32_f32>; } @@ -3144,754 +2830,542 @@ defm TLD4_A_2D_U32_F32 let IsTex = true, IsTexModeUnified = true in { // Texture fetch instructions using handles -class TEX_UNIFIED_1D_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x)), +class TEX_UNIFIED_1D_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", pattern>; -multiclass TEX_UNIFIED_1D { - def _R : TEX_UNIFIED_1D_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x))]>; - def _I : TEX_UNIFIED_1D_base; +multiclass TEX_UNIFIED_1D { + def _R : TEX_UNIFIED_1D_base; + def _I : TEX_UNIFIED_1D_base; } defm TEX_UNIFIED_1D_F32_S32 - : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs, - int_nvvm_tex_unified_1d_v4f32_s32>; + : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", int_nvvm_tex_unified_1d_v4f32_s32>; defm TEX_UNIFIED_1D_F32_F32 - : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_1d_v4f32_f32>; + : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", int_nvvm_tex_unified_1d_v4f32_f32>; defm TEX_UNIFIED_1D_S32_S32 - : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_unified_1d_v4s32_s32>; + : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", int_nvvm_tex_unified_1d_v4s32_s32>; defm TEX_UNIFIED_1D_S32_F32 - : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_1d_v4s32_f32>; + : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", int_nvvm_tex_unified_1d_v4s32_f32>; defm TEX_UNIFIED_1D_U32_S32 - : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_unified_1d_v4u32_s32>; + : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", int_nvvm_tex_unified_1d_v4u32_s32>; defm TEX_UNIFIED_1D_U32_F32 - : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_1d_v4u32_f32>; - -class TEX_UNIFIED_1D_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x, intype:$lod)), + : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", int_nvvm_tex_unified_1d_v4u32_f32>; + +class TEX_UNIFIED_1D_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;", pattern>; -multiclass TEX_UNIFIED_1D_LEVEL { - def _R : TEX_UNIFIED_1D_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x, intype:$lod))]>; - def _I : TEX_UNIFIED_1D_LEVEL_base; +multiclass TEX_UNIFIED_1D_LEVEL { + def _R : TEX_UNIFIED_1D_LEVEL_base; + def _I : TEX_UNIFIED_1D_LEVEL_base; } defm TEX_UNIFIED_1D_F32_F32_LEVEL - : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_1d_level_v4f32_f32>; + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", int_nvvm_tex_unified_1d_level_v4f32_f32>; defm TEX_UNIFIED_1D_S32_F32_LEVEL - : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_1d_level_v4s32_f32>; + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", int_nvvm_tex_unified_1d_level_v4s32_f32>; defm TEX_UNIFIED_1D_U32_F32_LEVEL - : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_1d_level_v4u32_f32>; - -class TEX_UNIFIED_1D_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)), + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", int_nvvm_tex_unified_1d_level_v4u32_f32>; + +class TEX_UNIFIED_1D_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x, B32:$gradx, B32:$grady)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", pattern>; -multiclass TEX_UNIFIED_1D_GRAD { - def _R : TEX_UNIFIED_1D_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x, intype:$gradx, intype:$grady))]>; - def _I : TEX_UNIFIED_1D_GRAD_base; +multiclass TEX_UNIFIED_1D_GRAD { + def _R : TEX_UNIFIED_1D_GRAD_base; + def _I : TEX_UNIFIED_1D_GRAD_base; } defm TEX_UNIFIED_1D_F32_F32_GRAD - : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_1d_grad_v4f32_f32>; + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", int_nvvm_tex_unified_1d_grad_v4f32_f32>; defm TEX_UNIFIED_1D_S32_F32_GRAD - : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_1d_grad_v4s32_f32>; + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", int_nvvm_tex_unified_1d_grad_v4s32_f32>; defm TEX_UNIFIED_1D_U32_F32_GRAD - : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_1d_grad_v4u32_f32>; - -class TEX_UNIFIED_1D_ARRAY_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins Int32Regs:$l, intype:$x)), + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", int_nvvm_tex_unified_1d_grad_v4u32_f32>; + +class TEX_UNIFIED_1D_ARRAY_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$l, B32:$x)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];", pattern>; -multiclass TEX_UNIFIED_1D_ARRAY { - def _R : TEX_UNIFIED_1D_ARRAY_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i32:$l, intype:$x))]>; - def _I : TEX_UNIFIED_1D_ARRAY_base; +multiclass TEX_UNIFIED_1D_ARRAY { + def _R : TEX_UNIFIED_1D_ARRAY_base; + def _I : TEX_UNIFIED_1D_ARRAY_base; } defm TEX_UNIFIED_1D_ARRAY_F32_S32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs, - int_nvvm_tex_unified_1d_array_v4f32_s32>; + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", int_nvvm_tex_unified_1d_array_v4f32_s32>; defm TEX_UNIFIED_1D_ARRAY_F32_F32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_1d_array_v4f32_f32>; + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", int_nvvm_tex_unified_1d_array_v4f32_f32>; defm TEX_UNIFIED_1D_ARRAY_S32_S32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_unified_1d_array_v4s32_s32>; + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", int_nvvm_tex_unified_1d_array_v4s32_s32>; defm TEX_UNIFIED_1D_ARRAY_S32_F32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_1d_array_v4s32_f32>; + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", int_nvvm_tex_unified_1d_array_v4s32_f32>; defm TEX_UNIFIED_1D_ARRAY_U32_S32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_unified_1d_array_v4u32_s32>; + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", int_nvvm_tex_unified_1d_array_v4u32_s32>; defm TEX_UNIFIED_1D_ARRAY_U32_F32 - : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_1d_array_v4u32_f32>; - -class TEX_UNIFIED_1D_ARRAY_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)), + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", int_nvvm_tex_unified_1d_array_v4u32_f32>; + +class TEX_UNIFIED_1D_ARRAY_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$l, B32:$x, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;", pattern>; -multiclass TEX_UNIFIED_1D_ARRAY_LEVEL { - def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i32:$l, intype:$x, intype:$lod))]>; - def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base; +multiclass TEX_UNIFIED_1D_ARRAY_LEVEL { + def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base; + def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base; } defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", - Float32Regs, Float32Regs, int_nvvm_tex_unified_1d_array_level_v4f32_f32>; defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_1d_array_level_v4s32_f32>; defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_1d_array_level_v4u32_f32>; -class TEX_UNIFIED_1D_ARRAY_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins Int32Regs:$l, intype:$x, - intype:$gradx, intype:$grady)), +class TEX_UNIFIED_1D_ARRAY_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$l, B32:$x, B32:$gradx, B32:$grady)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", pattern>; -multiclass TEX_UNIFIED_1D_ARRAY_GRAD { - def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i32:$l, intype:$x, intype:$gradx, intype:$grady))]>; - def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base; +multiclass TEX_UNIFIED_1D_ARRAY_GRAD { + def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base; + def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base; } defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", - Float32Regs, Float32Regs, int_nvvm_tex_unified_1d_array_grad_v4f32_f32>; defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_1d_array_grad_v4s32_f32>; defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_1d_array_grad_v4u32_f32>; -class TEX_UNIFIED_2D_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x, intype:$y)), +class TEX_UNIFIED_2D_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x, B32:$y)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];", pattern>; -multiclass TEX_UNIFIED_2D { - def _R : TEX_UNIFIED_2D_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x, intype:$y))]>; - def _I : TEX_UNIFIED_2D_base; +multiclass TEX_UNIFIED_2D { + def _R : TEX_UNIFIED_2D_base; + def _I : TEX_UNIFIED_2D_base; } defm TEX_UNIFIED_2D_F32_S32 - : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs, - int_nvvm_tex_unified_2d_v4f32_s32>; + : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", int_nvvm_tex_unified_2d_v4f32_s32>; defm TEX_UNIFIED_2D_F32_F32 - : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_2d_v4f32_f32>; + : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", int_nvvm_tex_unified_2d_v4f32_f32>; defm TEX_UNIFIED_2D_S32_S32 - : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_unified_2d_v4s32_s32>; + : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", int_nvvm_tex_unified_2d_v4s32_s32>; defm TEX_UNIFIED_2D_S32_F32 - : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_2d_v4s32_f32>; + : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", int_nvvm_tex_unified_2d_v4s32_f32>; defm TEX_UNIFIED_2D_U32_S32 - : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_unified_2d_v4u32_s32>; + : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", int_nvvm_tex_unified_2d_v4u32_s32>; defm TEX_UNIFIED_2D_U32_F32 - : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_2d_v4u32_f32>; - -class TEX_UNIFIED_2D_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x, intype:$y, intype:$lod)), + : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", int_nvvm_tex_unified_2d_v4u32_f32>; + +class TEX_UNIFIED_2D_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x, B32:$y, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;", pattern>; -multiclass TEX_UNIFIED_2D_LEVEL { - def _R : TEX_UNIFIED_2D_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x, intype:$y, intype:$lod))]>; - def _I : TEX_UNIFIED_2D_LEVEL_base; +multiclass TEX_UNIFIED_2D_LEVEL { + def _R : TEX_UNIFIED_2D_LEVEL_base; + def _I : TEX_UNIFIED_2D_LEVEL_base; } defm TEX_UNIFIED_2D_F32_F32_LEVEL - : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_2d_level_v4f32_f32>; + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", int_nvvm_tex_unified_2d_level_v4f32_f32>; defm TEX_UNIFIED_2D_S32_F32_LEVEL - : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_2d_level_v4s32_f32>; + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", int_nvvm_tex_unified_2d_level_v4s32_f32>; defm TEX_UNIFIED_2D_U32_F32_LEVEL - : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_2d_level_v4u32_f32>; - -class TEX_UNIFIED_2D_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x, intype:$y, - intype:$gradx0, intype:$gradx1, - intype:$grady0, intype:$grady1)), + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", int_nvvm_tex_unified_2d_level_v4u32_f32>; + +class TEX_UNIFIED_2D_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x, B32:$y, + B32:$gradx0, B32:$gradx1, + B32:$grady0, B32:$grady1)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}]," " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", pattern>; -multiclass TEX_UNIFIED_2D_GRAD { - def _R : TEX_UNIFIED_2D_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x, intype:$y, - intype:$gradx0, intype:$gradx1, - intype:$grady0, intype:$grady1))]>; - def _I : TEX_UNIFIED_2D_GRAD_base; +multiclass TEX_UNIFIED_2D_GRAD { + def _R : TEX_UNIFIED_2D_GRAD_base; + def _I : TEX_UNIFIED_2D_GRAD_base; } defm TEX_UNIFIED_2D_F32_F32_GRAD - : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_2d_grad_v4f32_f32>; + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", int_nvvm_tex_unified_2d_grad_v4f32_f32>; defm TEX_UNIFIED_2D_S32_F32_GRAD - : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_2d_grad_v4s32_f32>; + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", int_nvvm_tex_unified_2d_grad_v4s32_f32>; defm TEX_UNIFIED_2D_U32_F32_GRAD - : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_2d_grad_v4u32_f32>; - -class TEX_UNIFIED_2D_ARRAY_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)), + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", int_nvvm_tex_unified_2d_grad_v4u32_f32>; + +class TEX_UNIFIED_2D_ARRAY_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$l, B32:$x, B32:$y)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];", pattern>; -multiclass TEX_UNIFIED_2D_ARRAY { - def _R : TEX_UNIFIED_2D_ARRAY_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i32:$l, intype:$x, intype:$y))]>; - def _I : TEX_UNIFIED_2D_ARRAY_base; +multiclass TEX_UNIFIED_2D_ARRAY { + def _R : TEX_UNIFIED_2D_ARRAY_base; + def _I : TEX_UNIFIED_2D_ARRAY_base; } defm TEX_UNIFIED_2D_ARRAY_F32_S32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs, - int_nvvm_tex_unified_2d_array_v4f32_s32>; + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", int_nvvm_tex_unified_2d_array_v4f32_s32>; defm TEX_UNIFIED_2D_ARRAY_F32_F32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_2d_array_v4f32_f32>; + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", int_nvvm_tex_unified_2d_array_v4f32_f32>; defm TEX_UNIFIED_2D_ARRAY_S32_S32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_unified_2d_array_v4s32_s32>; + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", int_nvvm_tex_unified_2d_array_v4s32_s32>; defm TEX_UNIFIED_2D_ARRAY_S32_F32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_2d_array_v4s32_f32>; + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", int_nvvm_tex_unified_2d_array_v4s32_f32>; defm TEX_UNIFIED_2D_ARRAY_U32_S32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_unified_2d_array_v4u32_s32>; + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", int_nvvm_tex_unified_2d_array_v4u32_s32>; defm TEX_UNIFIED_2D_ARRAY_U32_F32 - : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_2d_array_v4u32_f32>; - -class TEX_UNIFIED_2D_ARRAY_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, - intype:$lod)), + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", int_nvvm_tex_unified_2d_array_v4u32_f32>; + +class TEX_UNIFIED_2D_ARRAY_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$l, B32:$x, B32:$y, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$l, $x, $y, $y\\}], $lod;", pattern>; -multiclass TEX_UNIFIED_2D_ARRAY_LEVEL { - def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$lod))]>; - def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base; +multiclass TEX_UNIFIED_2D_ARRAY_LEVEL { + def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base; + def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base; } defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", - Float32Regs, Float32Regs, int_nvvm_tex_unified_2d_array_level_v4f32_f32>; defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_2d_array_level_v4s32_f32>; defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_2d_array_level_v4u32_f32>; -class TEX_UNIFIED_2D_ARRAY_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, - intype:$gradx0, intype:$gradx1, - intype:$grady0, intype:$grady1)), +class TEX_UNIFIED_2D_ARRAY_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$l, B32:$x, B32:$y, + B32:$gradx0, B32:$gradx1, + B32:$grady0, B32:$grady1)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}]," " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", pattern>; -multiclass TEX_UNIFIED_2D_ARRAY_GRAD { - def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i32:$l, intype:$x, intype:$y, - intype:$gradx0, intype:$gradx1, - intype:$grady0, intype:$grady1))]>; - def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base; +multiclass TEX_UNIFIED_2D_ARRAY_GRAD { + def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base; + def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base; } defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", - Float32Regs, Float32Regs, int_nvvm_tex_unified_2d_array_grad_v4f32_f32>; defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_2d_array_grad_v4s32_f32>; defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_2d_array_grad_v4u32_f32>; -class TEX_UNIFIED_3D_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x, intype:$y, intype:$z)), +class TEX_UNIFIED_3D_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x, B32:$y, B32:$z)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", pattern>; -multiclass TEX_UNIFIED_3D { - def _R : TEX_UNIFIED_3D_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x, intype:$y, intype:$z))]>; - def _I : TEX_UNIFIED_3D_base; +multiclass TEX_UNIFIED_3D { + def _R : TEX_UNIFIED_3D_base; + def _I : TEX_UNIFIED_3D_base; } defm TEX_UNIFIED_3D_F32_S32 - : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs, - int_nvvm_tex_unified_3d_v4f32_s32>; + : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", int_nvvm_tex_unified_3d_v4f32_s32>; defm TEX_UNIFIED_3D_F32_F32 - : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_3d_v4f32_f32>; + : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", int_nvvm_tex_unified_3d_v4f32_f32>; defm TEX_UNIFIED_3D_S32_S32 - : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_unified_3d_v4s32_s32>; + : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", int_nvvm_tex_unified_3d_v4s32_s32>; defm TEX_UNIFIED_3D_S32_F32 - : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_3d_v4s32_f32>; + : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", int_nvvm_tex_unified_3d_v4s32_f32>; defm TEX_UNIFIED_3D_U32_S32 - : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs, - int_nvvm_tex_unified_3d_v4u32_s32>; + : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", int_nvvm_tex_unified_3d_v4u32_s32>; defm TEX_UNIFIED_3D_U32_F32 - : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_3d_v4u32_f32>; - -class TEX_UNIFIED_3D_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), + : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", int_nvvm_tex_unified_3d_v4u32_f32>; + +class TEX_UNIFIED_3D_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x, B32:$y, B32:$z, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$x, $y, $z, $z\\}], $lod;", pattern>; -multiclass TEX_UNIFIED_3D_LEVEL { - def _R : TEX_UNIFIED_3D_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>; - def _I : TEX_UNIFIED_3D_LEVEL_base; +multiclass TEX_UNIFIED_3D_LEVEL { + def _R : TEX_UNIFIED_3D_LEVEL_base; + def _I : TEX_UNIFIED_3D_LEVEL_base; } defm TEX_UNIFIED_3D_F32_F32_LEVEL - : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_3d_level_v4f32_f32>; + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", int_nvvm_tex_unified_3d_level_v4f32_f32>; defm TEX_UNIFIED_3D_S32_F32_LEVEL - : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_3d_level_v4s32_f32>; + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", int_nvvm_tex_unified_3d_level_v4s32_f32>; defm TEX_UNIFIED_3D_U32_F32_LEVEL - : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_3d_level_v4u32_f32>; - -class TEX_UNIFIED_3D_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x, intype:$y, intype:$z, - intype:$gradx0, intype:$gradx1, - intype:$gradx2, intype:$grady0, - intype:$grady1, intype:$grady2)), + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", int_nvvm_tex_unified_3d_level_v4u32_f32>; + +class TEX_UNIFIED_3D_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x, B32:$y, B32:$z, + B32:$gradx0, B32:$gradx1, + B32:$gradx2, B32:$grady0, + B32:$grady1, B32:$grady2)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," " \\{$grady0, $grady1, $grady2, $grady2\\};", pattern>; -multiclass TEX_UNIFIED_3D_GRAD { - def _R : TEX_UNIFIED_3D_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x, intype:$y, intype:$z, - intype:$gradx0, intype:$gradx1, intype:$gradx2, - intype:$grady0, intype:$grady1, intype:$grady2))]>; - def _I : TEX_UNIFIED_3D_GRAD_base; +multiclass TEX_UNIFIED_3D_GRAD { + def _R : TEX_UNIFIED_3D_GRAD_base; + def _I : TEX_UNIFIED_3D_GRAD_base; } defm TEX_UNIFIED_3D_F32_F32_GRAD - : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_3d_grad_v4f32_f32>; + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", int_nvvm_tex_unified_3d_grad_v4f32_f32>; defm TEX_UNIFIED_3D_S32_F32_GRAD - : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_3d_grad_v4s32_f32>; + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", int_nvvm_tex_unified_3d_grad_v4s32_f32>; defm TEX_UNIFIED_3D_U32_F32_GRAD - : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_3d_grad_v4u32_f32>; + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", int_nvvm_tex_unified_3d_grad_v4u32_f32>; -class TEX_UNIFIED_CUBE_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x, intype:$y, intype:$z)), +class TEX_UNIFIED_CUBE_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x, B32:$y, B32:$z)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", pattern>; -multiclass TEX_UNIFIED_CUBE { - def _R : TEX_UNIFIED_CUBE_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x, intype:$y, intype:$z))]>; - def _I : TEX_UNIFIED_CUBE_base; +multiclass TEX_UNIFIED_CUBE { + def _R : TEX_UNIFIED_CUBE_base; + def _I : TEX_UNIFIED_CUBE_base; } defm TEX_UNIFIED_CUBE_F32_F32 - : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_cube_v4f32_f32>; + : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", int_nvvm_tex_unified_cube_v4f32_f32>; defm TEX_UNIFIED_CUBE_S32_F32 - : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_cube_v4s32_f32>; + : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", int_nvvm_tex_unified_cube_v4s32_f32>; defm TEX_UNIFIED_CUBE_U32_F32 - : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_cube_v4u32_f32>; - -class TEX_UNIFIED_CUBE_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), + : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", int_nvvm_tex_unified_cube_v4u32_f32>; + +class TEX_UNIFIED_CUBE_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x, B32:$y, B32:$z, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$x, $y, $z, $z\\}], $lod;", pattern>; -multiclass TEX_UNIFIED_CUBE_LEVEL { - def _R : TEX_UNIFIED_CUBE_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>; - def _I : TEX_UNIFIED_CUBE_LEVEL_base; +multiclass TEX_UNIFIED_CUBE_LEVEL { + def _R : TEX_UNIFIED_CUBE_LEVEL_base; + def _I : TEX_UNIFIED_CUBE_LEVEL_base; } defm TEX_UNIFIED_CUBE_F32_F32_LEVEL : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", - Float32Regs, Float32Regs, int_nvvm_tex_unified_cube_level_v4f32_f32>; defm TEX_UNIFIED_CUBE_S32_F32_LEVEL : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_cube_level_v4s32_f32>; defm TEX_UNIFIED_CUBE_U32_F32_LEVEL : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_cube_level_v4u32_f32>; -class TEX_UNIFIED_CUBE_ARRAY_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), +class TEX_UNIFIED_CUBE_ARRAY_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$l, B32:$x, B32:$y, B32:$z)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];", pattern>; -multiclass TEX_UNIFIED_CUBE_ARRAY { - def _R : TEX_UNIFIED_CUBE_ARRAY_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z))]>; - def _I : TEX_UNIFIED_CUBE_ARRAY_base; +multiclass TEX_UNIFIED_CUBE_ARRAY { + def _R : TEX_UNIFIED_CUBE_ARRAY_base; + def _I : TEX_UNIFIED_CUBE_ARRAY_base; } defm TEX_UNIFIED_CUBE_ARRAY_F32_F32 - : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_cube_array_v4f32_f32>; + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", int_nvvm_tex_unified_cube_array_v4f32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_S32_F32 - : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_cube_array_v4s32_f32>; + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", int_nvvm_tex_unified_cube_array_v4s32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_U32_F32 - : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_cube_array_v4u32_f32>; - -class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, - intype:$lod)), + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", int_nvvm_tex_unified_cube_array_v4u32_f32>; + +class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$l, B32:$x, B32:$y, B32:$z, B32:$lod)), inst # " \t\\{$r, $g, $b, $a\\}," " [$t, \\{$l, $x, $y, $z\\}], $lod;", pattern>; -multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL { - def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z, intype:$lod))]>; - def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base; +multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL { + def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base; + def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base; } defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", - Float32Regs, Float32Regs, int_nvvm_tex_unified_cube_array_level_v4f32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_cube_array_level_v4s32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_cube_array_level_v4u32_f32>; -class TEX_UNIFIED_CUBE_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins intype:$x, intype:$y, intype:$z, - intype:$gradx0, intype:$gradx1, - intype:$gradx2, intype:$grady0, - intype:$grady1, intype:$grady2)), +class TEX_UNIFIED_CUBE_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$x, B32:$y, B32:$z, + B32:$gradx0, B32:$gradx1, + B32:$gradx2, B32:$grady0, + B32:$grady1, B32:$grady2)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," " \\{$grady0, $grady1, $grady2, $grady2\\};", pattern>; -multiclass TEX_UNIFIED_CUBE_GRAD { - def _R : TEX_UNIFIED_CUBE_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, intype:$x, intype:$y, intype:$z, - intype:$gradx0, intype:$gradx1, intype:$gradx2, - intype:$grady0, intype:$grady1, intype:$grady2))]>; - def _I : TEX_UNIFIED_CUBE_GRAD_base; +multiclass TEX_UNIFIED_CUBE_GRAD { + def _R : TEX_UNIFIED_CUBE_GRAD_base; + def _I : TEX_UNIFIED_CUBE_GRAD_base; } defm TEX_UNIFIED_CUBE_F32_F32_GRAD - : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tex_unified_cube_grad_v4f32_f32>; + : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", int_nvvm_tex_unified_cube_grad_v4f32_f32>; defm TEX_UNIFIED_CUBE_S32_F32_GRAD - : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_cube_grad_v4s32_f32>; + : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", int_nvvm_tex_unified_cube_grad_v4s32_f32>; defm TEX_UNIFIED_CUBE_U32_F32_GRAD - : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tex_unified_cube_grad_v4u32_f32>; - -class TEX_UNIFIED_CUBE_ARRAY_GRAD_base pattern = []> - : NVPTXInst<(outs outtype:$r, outtype:$g, - outtype:$b, outtype:$a), - !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, - intype:$gradx0, intype:$gradx1, - intype:$gradx2, intype:$grady0, - intype:$grady1, intype:$grady2)), + : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", int_nvvm_tex_unified_cube_grad_v4u32_f32>; + +class TEX_UNIFIED_CUBE_ARRAY_GRAD_base pattern = []> + : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a), + !con(tex, (ins B32:$l, B32:$x, B32:$y, B32:$z, + B32:$gradx0, B32:$gradx1, + B32:$gradx2, B32:$grady0, + B32:$grady1, B32:$grady2)), inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}]," " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," " \\{$grady0, $grady1, $grady2, $grady2\\};", pattern>; -multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD { - def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, - (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z, - intype:$gradx0, intype:$gradx1, - intype:$gradx2, intype:$grady0, - intype:$grady1, intype:$grady2))]>; - def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base; +multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD { + def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base; + def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base; } defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32", - Float32Regs, Float32Regs, int_nvvm_tex_unified_cube_array_grad_v4f32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_cube_array_grad_v4s32_f32>; defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32", - Int32Regs, Float32Regs, int_nvvm_tex_unified_cube_array_grad_v4u32_f32>; -class TLD4_UNIFIED_2D_base pattern = []> - : NVPTXInst<(outs outtype:$v0, outtype:$v1, - outtype:$v2, outtype:$v3), - !con(tex, (ins intype:$x, intype:$y)), +class TLD4_UNIFIED_2D_base pattern = []> + : NVPTXInst<(outs B32:$v0, B32:$v1, B32:$v2, B32:$v3), + !con(tex, (ins B32:$x, B32:$y)), inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];", pattern>; -multiclass TLD4_UNIFIED_2D { - def _R : TLD4_UNIFIED_2D_base< - inst, outtype, intype, (ins Int64Regs:$t), - [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3, - (intr i64:$t, intype:$x, intype:$y))]>; - def _I : TLD4_UNIFIED_2D_base; +multiclass TLD4_UNIFIED_2D { + def _R : TLD4_UNIFIED_2D_base; + def _I : TLD4_UNIFIED_2D_base; } defm TLD4_UNIFIED_R_2D_F32_F32 - : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tld4_unified_r_2d_v4f32_f32>; + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", int_nvvm_tld4_unified_r_2d_v4f32_f32>; defm TLD4_UNIFIED_G_2D_F32_F32 - : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tld4_unified_g_2d_v4f32_f32>; + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", int_nvvm_tld4_unified_g_2d_v4f32_f32>; defm TLD4_UNIFIED_B_2D_F32_F32 - : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tld4_unified_b_2d_v4f32_f32>; + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", int_nvvm_tld4_unified_b_2d_v4f32_f32>; defm TLD4_UNIFIED_A_2D_F32_F32 - : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs, - int_nvvm_tld4_unified_a_2d_v4f32_f32>; + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", int_nvvm_tld4_unified_a_2d_v4f32_f32>; defm TLD4_UNIFIED_R_2D_S32_F32 - : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_unified_r_2d_v4s32_f32>; + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", int_nvvm_tld4_unified_r_2d_v4s32_f32>; defm TLD4_UNIFIED_G_2D_S32_F32 - : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_unified_g_2d_v4s32_f32>; + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", int_nvvm_tld4_unified_g_2d_v4s32_f32>; defm TLD4_UNIFIED_B_2D_S32_F32 - : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_unified_b_2d_v4s32_f32>; + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", int_nvvm_tld4_unified_b_2d_v4s32_f32>; defm TLD4_UNIFIED_A_2D_S32_F32 - : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_unified_a_2d_v4s32_f32>; + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", int_nvvm_tld4_unified_a_2d_v4s32_f32>; defm TLD4_UNIFIED_R_2D_U32_F32 - : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_unified_r_2d_v4u32_f32>; + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", int_nvvm_tld4_unified_r_2d_v4u32_f32>; defm TLD4_UNIFIED_G_2D_U32_F32 - : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_unified_g_2d_v4u32_f32>; + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", int_nvvm_tld4_unified_g_2d_v4u32_f32>; defm TLD4_UNIFIED_B_2D_U32_F32 - : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_unified_b_2d_v4u32_f32>; + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", int_nvvm_tld4_unified_b_2d_v4u32_f32>; defm TLD4_UNIFIED_A_2D_U32_F32 - : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs, - int_nvvm_tld4_unified_a_2d_v4u32_f32>; + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", int_nvvm_tld4_unified_a_2d_v4u32_f32>; } - //=== Surface load instructions let IsSuld = true in { @@ -3899,162 +3373,150 @@ let IsSuld = true in { class SULD_1D_base pattern = []> : NVPTXInst<(outs outtype:$r), - !con(surf, (ins Int32Regs:$x)), + !con(surf, (ins B32:$x)), inst # " \\{$r\\}, [$s, \\{$x\\}];", pattern>; multiclass SULD_1D { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_1D_base; def _I : SULD_1D_base; } -defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>; -defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>; -defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>; -defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>; +defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", B16>; +defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", B16>; +defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", B32>; +defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", B64>; -defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>; -defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>; -defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>; -defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>; +defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", B16>; +defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", B16>; +defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", B32>; +defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", B64>; -defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>; -defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>; -defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>; -defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>; +defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", B16>; +defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", B16>; +defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", B32>; +defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", B64>; class SULD_1D_ARRAY_base pattern = []> : NVPTXInst<(outs outtype:$r), - !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), + !con(surf, (ins B32:$l, B32:$x)), inst # " \\{$r\\}, [$s, \\{$l, $x\\}];", pattern>; multiclass SULD_1D_ARRAY { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_1D_ARRAY_base; def _I : SULD_1D_ARRAY_base; } -defm SULD_1D_ARRAY_I8_CLAMP - : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>; -defm SULD_1D_ARRAY_I16_CLAMP - : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>; -defm SULD_1D_ARRAY_I32_CLAMP - : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>; -defm SULD_1D_ARRAY_I64_CLAMP - : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>; - -defm SULD_1D_ARRAY_I8_TRAP - : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>; -defm SULD_1D_ARRAY_I16_TRAP - : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>; -defm SULD_1D_ARRAY_I32_TRAP - : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>; -defm SULD_1D_ARRAY_I64_TRAP - : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>; - -defm SULD_1D_ARRAY_I8_ZERO - : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>; -defm SULD_1D_ARRAY_I16_ZERO - : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>; -defm SULD_1D_ARRAY_I32_ZERO - : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>; -defm SULD_1D_ARRAY_I64_ZERO - : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>; +defm SULD_1D_ARRAY_I8_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", B16>; +defm SULD_1D_ARRAY_I16_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", B16>; +defm SULD_1D_ARRAY_I32_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", B32>; +defm SULD_1D_ARRAY_I64_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", B64>; + +defm SULD_1D_ARRAY_I8_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", B16>; +defm SULD_1D_ARRAY_I16_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", B16>; +defm SULD_1D_ARRAY_I32_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", B32>; +defm SULD_1D_ARRAY_I64_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", B64>; + +defm SULD_1D_ARRAY_I8_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", B16>; +defm SULD_1D_ARRAY_I16_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", B16>; +defm SULD_1D_ARRAY_I32_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", B32>; +defm SULD_1D_ARRAY_I64_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", B64>; class SULD_2D_base pattern = []> : NVPTXInst<(outs outtype:$r), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), + !con(surf, (ins B32:$x, B32:$y)), inst # " \\{$r\\}, [$s, \\{$x, $y\\}];", pattern>; multiclass SULD_2D { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_2D_base; def _I : SULD_2D_base; } -defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>; -defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>; -defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>; -defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>; +defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", B16>; +defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", B16>; +defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", B32>; +defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", B64>; -defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>; -defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>; -defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>; -defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>; +defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", B16>; +defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", B16>; +defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", B32>; +defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", B64>; -defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>; -defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>; -defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>; -defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>; +defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", B16>; +defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", B16>; +defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", B32>; +defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", B64>; class SULD_2D_ARRAY_base pattern = []> : NVPTXInst<(outs outtype:$r), - !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), + !con(surf, (ins B32:$l, B32:$x, B32:$y)), inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", pattern>; multiclass SULD_2D_ARRAY { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_2D_ARRAY_base; def _I : SULD_2D_ARRAY_base; } -defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>; -defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>; -defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>; -defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>; +defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", B16>; +defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", B16>; +defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", B32>; +defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", B64>; -defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>; -defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>; -defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>; -defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>; +defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", B16>; +defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", B16>; +defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", B32>; +defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", B64>; -defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>; -defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>; -defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>; -defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>; +defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", B16>; +defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", B16>; +defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", B32>; +defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", B64>; class SULD_3D_base pattern = []> : NVPTXInst<(outs outtype:$r), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), + !con(surf, (ins B32:$x, B32:$y, B32:$z)), inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", pattern>; multiclass SULD_3D { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_3D_base; def _I : SULD_3D_base; } -defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>; -defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>; -defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>; -defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>; +defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", B16>; +defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", B16>; +defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", B32>; +defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", B64>; -defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>; -defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>; -defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>; -defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>; +defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", B16>; +defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", B16>; +defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", B32>; +defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", B64>; -defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>; -defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>; -defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>; -defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>; +defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", B16>; +defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", B16>; +defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", B32>; +defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", B64>; } let IsSuld = 2 in { @@ -4062,188 +3524,152 @@ let IsSuld = 2 in { class SULD_1D_V2_base pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g), - !con(surf, (ins Int32Regs:$x)), + !con(surf, (ins B32:$x)), inst # " \\{$r, $g\\}, [$s, \\{$x\\}];", pattern>; multiclass SULD_1D_V2 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_1D_V2_base; def _I : SULD_1D_V2_base; } -defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>; -defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>; -defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>; -defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>; +defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", B16>; +defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", B16>; +defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", B32>; +defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", B64>; -defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>; -defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>; -defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>; -defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>; +defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", B16>; +defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", B16>; +defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", B32>; +defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", B64>; -defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>; -defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>; -defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>; -defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>; +defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", B16>; +defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", B16>; +defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", B32>; +defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", B64>; class SULD_1D_ARRAY_V2_base pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g), - !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), + !con(surf, (ins B32:$l, B32:$x)), inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];", pattern>; multiclass SULD_1D_ARRAY_V2 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_1D_ARRAY_V2_base; def _I : SULD_1D_ARRAY_V2_base; } -defm SULD_1D_ARRAY_V2I8_CLAMP - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>; -defm SULD_1D_ARRAY_V2I16_CLAMP - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>; -defm SULD_1D_ARRAY_V2I32_CLAMP - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>; -defm SULD_1D_ARRAY_V2I64_CLAMP - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>; - -defm SULD_1D_ARRAY_V2I8_TRAP - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>; -defm SULD_1D_ARRAY_V2I16_TRAP - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>; -defm SULD_1D_ARRAY_V2I32_TRAP - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>; -defm SULD_1D_ARRAY_V2I64_TRAP - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>; - -defm SULD_1D_ARRAY_V2I8_ZERO - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>; -defm SULD_1D_ARRAY_V2I16_ZERO - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>; -defm SULD_1D_ARRAY_V2I32_ZERO - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>; -defm SULD_1D_ARRAY_V2I64_ZERO - : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>; +defm SULD_1D_ARRAY_V2I8_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", B16>; +defm SULD_1D_ARRAY_V2I16_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", B16>; +defm SULD_1D_ARRAY_V2I32_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", B32>; +defm SULD_1D_ARRAY_V2I64_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", B64>; + +defm SULD_1D_ARRAY_V2I8_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", B16>; +defm SULD_1D_ARRAY_V2I16_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", B16>; +defm SULD_1D_ARRAY_V2I32_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", B32>; +defm SULD_1D_ARRAY_V2I64_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", B64>; + +defm SULD_1D_ARRAY_V2I8_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", B16>; +defm SULD_1D_ARRAY_V2I16_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", B16>; +defm SULD_1D_ARRAY_V2I32_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", B32>; +defm SULD_1D_ARRAY_V2I64_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", B64>; class SULD_2D_V2_base pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), + !con(surf, (ins B32:$x, B32:$y)), inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];", pattern>; multiclass SULD_2D_V2 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_2D_V2_base; def _I : SULD_2D_V2_base; } -defm SULD_2D_V2I8_CLAMP - : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>; -defm SULD_2D_V2I16_CLAMP - : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>; -defm SULD_2D_V2I32_CLAMP - : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>; -defm SULD_2D_V2I64_CLAMP - : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>; - -defm SULD_2D_V2I8_TRAP - : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>; -defm SULD_2D_V2I16_TRAP - : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>; -defm SULD_2D_V2I32_TRAP - : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>; -defm SULD_2D_V2I64_TRAP - : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>; - -defm SULD_2D_V2I8_ZERO - : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>; -defm SULD_2D_V2I16_ZERO - : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>; -defm SULD_2D_V2I32_ZERO - : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>; -defm SULD_2D_V2I64_ZERO - : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>; +defm SULD_2D_V2I8_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", B16>; +defm SULD_2D_V2I16_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", B16>; +defm SULD_2D_V2I32_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", B32>; +defm SULD_2D_V2I64_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", B64>; + +defm SULD_2D_V2I8_TRAP : SULD_2D_V2<"suld.b.2d.v2.b8.trap", B16>; +defm SULD_2D_V2I16_TRAP : SULD_2D_V2<"suld.b.2d.v2.b16.trap", B16>; +defm SULD_2D_V2I32_TRAP : SULD_2D_V2<"suld.b.2d.v2.b32.trap", B32>; +defm SULD_2D_V2I64_TRAP : SULD_2D_V2<"suld.b.2d.v2.b64.trap", B64>; + +defm SULD_2D_V2I8_ZERO : SULD_2D_V2<"suld.b.2d.v2.b8.zero", B16>; +defm SULD_2D_V2I16_ZERO : SULD_2D_V2<"suld.b.2d.v2.b16.zero", B16>; +defm SULD_2D_V2I32_ZERO : SULD_2D_V2<"suld.b.2d.v2.b32.zero", B32>; +defm SULD_2D_V2I64_ZERO : SULD_2D_V2<"suld.b.2d.v2.b64.zero", B64>; class SULD_2D_ARRAY_V2_base pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g), - !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), + !con(surf, (ins B32:$l, B32:$x, B32:$y)), inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];", pattern>; multiclass SULD_2D_ARRAY_V2 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_2D_ARRAY_V2_base; def _I : SULD_2D_ARRAY_V2_base; } -defm SULD_2D_ARRAY_V2I8_CLAMP - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>; -defm SULD_2D_ARRAY_V2I16_CLAMP - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>; -defm SULD_2D_ARRAY_V2I32_CLAMP - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>; -defm SULD_2D_ARRAY_V2I64_CLAMP - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>; - -defm SULD_2D_ARRAY_V2I8_TRAP - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>; -defm SULD_2D_ARRAY_V2I16_TRAP - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>; -defm SULD_2D_ARRAY_V2I32_TRAP - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>; -defm SULD_2D_ARRAY_V2I64_TRAP - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>; - -defm SULD_2D_ARRAY_V2I8_ZERO - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>; -defm SULD_2D_ARRAY_V2I16_ZERO - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>; -defm SULD_2D_ARRAY_V2I32_ZERO - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>; -defm SULD_2D_ARRAY_V2I64_ZERO - : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>; +defm SULD_2D_ARRAY_V2I8_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", B16>; +defm SULD_2D_ARRAY_V2I16_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", B16>; +defm SULD_2D_ARRAY_V2I32_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", B32>; +defm SULD_2D_ARRAY_V2I64_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", B64>; + +defm SULD_2D_ARRAY_V2I8_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", B16>; +defm SULD_2D_ARRAY_V2I16_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", B16>; +defm SULD_2D_ARRAY_V2I32_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", B32>; +defm SULD_2D_ARRAY_V2I64_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", B64>; + +defm SULD_2D_ARRAY_V2I8_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", B16>; +defm SULD_2D_ARRAY_V2I16_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", B16>; +defm SULD_2D_ARRAY_V2I32_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", B32>; +defm SULD_2D_ARRAY_V2I64_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", B64>; class SULD_3D_V2_base pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), + !con(surf, (ins B32:$x, B32:$y, B32:$z)), inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", pattern>; multiclass SULD_3D_V2 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_3D_V2_base; def _I : SULD_3D_V2_base; } -defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>; -defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>; -defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>; -defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>; +defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", B16>; +defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", B16>; +defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", B32>; +defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", B64>; -defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>; -defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>; -defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>; -defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>; +defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", B16>; +defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", B16>; +defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", B32>; +defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", B64>; -defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>; -defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>; -defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>; -defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>; +defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", B16>; +defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", B16>; +defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", B32>; +defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", B64>; } @@ -4252,157 +3678,139 @@ let IsSuld = 3 in { class SULD_1D_V4_base pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), - !con(surf, (ins Int32Regs:$x)), + !con(surf, (ins B32:$x)), inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", pattern>; multiclass SULD_1D_V4 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_1D_V4_base; def _I : SULD_1D_V4_base; } -defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>; -defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>; -defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>; +defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", B16>; +defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", B16>; +defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", B32>; -defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>; -defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>; -defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>; +defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", B16>; +defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", B16>; +defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", B32>; -defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>; -defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>; -defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>; +defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", B16>; +defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", B16>; +defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", B32>; class SULD_1D_ARRAY_V4_base pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), - !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), + !con(surf, (ins B32:$l, B32:$x)), inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];", pattern>; multiclass SULD_1D_ARRAY_V4 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_1D_ARRAY_V4_base; def _I : SULD_1D_ARRAY_V4_base; } -defm SULD_1D_ARRAY_V4I8_CLAMP - : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>; -defm SULD_1D_ARRAY_V4I16_CLAMP - : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>; -defm SULD_1D_ARRAY_V4I32_CLAMP - : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>; +defm SULD_1D_ARRAY_V4I8_CLAMP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", B16>; +defm SULD_1D_ARRAY_V4I16_CLAMP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", B16>; +defm SULD_1D_ARRAY_V4I32_CLAMP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", B32>; -defm SULD_1D_ARRAY_V4I8_TRAP - : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>; -defm SULD_1D_ARRAY_V4I16_TRAP - : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>; -defm SULD_1D_ARRAY_V4I32_TRAP - : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>; +defm SULD_1D_ARRAY_V4I8_TRAP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", B16>; +defm SULD_1D_ARRAY_V4I16_TRAP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", B16>; +defm SULD_1D_ARRAY_V4I32_TRAP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", B32>; -defm SULD_1D_ARRAY_V4I8_ZERO - : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>; -defm SULD_1D_ARRAY_V4I16_ZERO - : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>; -defm SULD_1D_ARRAY_V4I32_ZERO - : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>; +defm SULD_1D_ARRAY_V4I8_ZERO : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", B16>; +defm SULD_1D_ARRAY_V4I16_ZERO : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", B16>; +defm SULD_1D_ARRAY_V4I32_ZERO : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", B32>; class SULD_2D_V4_base pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), + !con(surf, (ins B32:$x, B32:$y)), inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", pattern>; multiclass SULD_2D_V4 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_2D_V4_base; def _I : SULD_2D_V4_base; } -defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>; -defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>; -defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>; +defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", B16>; +defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", B16>; +defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", B32>; -defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>; -defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>; -defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>; +defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", B16>; +defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", B16>; +defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", B32>; -defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>; -defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>; -defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>; +defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", B16>; +defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", B16>; +defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", B32>; class SULD_2D_ARRAY_V4_base pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), - !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), + !con(surf, (ins B32:$l, B32:$x, B32:$y)), inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];", pattern>; multiclass SULD_2D_ARRAY_V4 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_2D_ARRAY_V4_base; def _I : SULD_2D_ARRAY_V4_base; } -defm SULD_2D_ARRAY_V4I8_CLAMP - : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>; -defm SULD_2D_ARRAY_V4I16_CLAMP - : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>; -defm SULD_2D_ARRAY_V4I32_CLAMP - : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>; +defm SULD_2D_ARRAY_V4I8_CLAMP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", B16>; +defm SULD_2D_ARRAY_V4I16_CLAMP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", B16>; +defm SULD_2D_ARRAY_V4I32_CLAMP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", B32>; -defm SULD_2D_ARRAY_V4I8_TRAP - : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>; -defm SULD_2D_ARRAY_V4I16_TRAP - : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>; -defm SULD_2D_ARRAY_V4I32_TRAP - : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>; +defm SULD_2D_ARRAY_V4I8_TRAP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", B16>; +defm SULD_2D_ARRAY_V4I16_TRAP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", B16>; +defm SULD_2D_ARRAY_V4I32_TRAP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", B32>; -defm SULD_2D_ARRAY_V4I8_ZERO - : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>; -defm SULD_2D_ARRAY_V4I16_ZERO - : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>; -defm SULD_2D_ARRAY_V4I32_ZERO - : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>; +defm SULD_2D_ARRAY_V4I8_ZERO : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", B16>; +defm SULD_2D_ARRAY_V4I16_ZERO : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", B16>; +defm SULD_2D_ARRAY_V4I32_ZERO : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", B32>; class SULD_3D_V4_base pattern = []> : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), + !con(surf, (ins B32:$x, B32:$y, B32:$z)), inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];", pattern>; multiclass SULD_3D_V4 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SULD_3D_V4_base; def _I : SULD_3D_V4_base; } -defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>; -defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>; -defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>; +defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", B16>; +defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", B16>; +defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", B32>; -defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>; -defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>; -defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>; +defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", B16>; +defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", B16>; +defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", B32>; -defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>; -defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>; -defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>; +defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", B16>; +defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", B16>; +defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", B32>; } @@ -4414,11 +3822,11 @@ let IsSurfTexQuery = true in { foreach query = ["channel_order", "channel_data_type", "width", "height", "depth", "array_size", "num_samples", "num_mipmap_levels"] in { def TXQ_ # !toupper(query) # _R - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + : NVPTXInst<(outs B32:$d), (ins B64:$a), "txq." # query # ".b32 \t$d, [$a];", [(set i32:$d, (!cast("int_nvvm_txq_" # query) i64:$a))]>; def TXQ_ # !toupper(query) # _I - : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + : NVPTXInst<(outs B32:$d), (ins i64imm:$a), "txq." # query # ".b32 \t$d, [$a];", []>; } @@ -4431,11 +3839,11 @@ let IsSurfTexQuery = true in { let IsSurfTexQuery = true in { foreach query = ["channel_order", "channel_data_type", "width", "height", "depth", "array_size"] in { def SUQ_ # !toupper(query) # _R - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + : NVPTXInst<(outs B32:$d), (ins B64:$a), "suq." # query # ".b32 \t$d, [$a];", [(set i32:$d, (!cast("int_nvvm_suq_" # query) i64:$a))]>; def SUQ_ # !toupper(query) # _I - : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + : NVPTXInst<(outs B32:$d), (ins i64imm:$a), "suq." # query # ".b32 \t$d, [$a];", []>; } @@ -4445,15 +3853,15 @@ let IsSurfTexQuery = true in { // TODO: These intrinsics are not yet finalized, pending PTX ISA design work def ISTYPEP_SAMPLER - : BasicNVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + : BasicNVPTXInst<(outs B1:$d), (ins B64:$a), "istypep.samplerref", [(set i1:$d, (int_nvvm_istypep_sampler i64:$a))]>; def ISTYPEP_SURFACE - : BasicNVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + : BasicNVPTXInst<(outs B1:$d), (ins B64:$a), "istypep.surfref", [(set i1:$d, (int_nvvm_istypep_surface i64:$a))]>; def ISTYPEP_TEXTURE - : BasicNVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + : BasicNVPTXInst<(outs B1:$d), (ins B64:$a), "istypep.texref", [(set i1:$d, (int_nvvm_istypep_texture i64:$a))]>; @@ -4463,561 +3871,489 @@ let IsSust = true in { class SUST_1D_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$x, intype:$r)), + !con(surf, (ins B32:$x, intype:$r)), inst # " \t[$s, \\{$x\\}], \\{$r\\};", pat>; multiclass SUST_1D { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_1D_base; + def _R : SUST_1D_base; def _I : SUST_1D_base; } -defm SUST_B_1D_I8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>; -defm SUST_B_1D_I16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>; -defm SUST_B_1D_I32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>; -defm SUST_B_1D_I64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>; +defm SUST_B_1D_I8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", B16>; +defm SUST_B_1D_I16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", B16>; +defm SUST_B_1D_I32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", B32>; +defm SUST_B_1D_I64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", B64>; -defm SUST_B_1D_I8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>; -defm SUST_B_1D_I16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>; -defm SUST_B_1D_I32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>; -defm SUST_B_1D_I64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>; +defm SUST_B_1D_I8_TRAP : SUST_1D<"sust.b.1d.b8.trap", B16>; +defm SUST_B_1D_I16_TRAP : SUST_1D<"sust.b.1d.b16.trap", B16>; +defm SUST_B_1D_I32_TRAP : SUST_1D<"sust.b.1d.b32.trap", B32>; +defm SUST_B_1D_I64_TRAP : SUST_1D<"sust.b.1d.b64.trap", B64>; -defm SUST_B_1D_I8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>; -defm SUST_B_1D_I16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>; -defm SUST_B_1D_I32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>; -defm SUST_B_1D_I64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>; +defm SUST_B_1D_I8_ZERO : SUST_1D<"sust.b.1d.b8.zero", B16>; +defm SUST_B_1D_I16_ZERO : SUST_1D<"sust.b.1d.b16.zero", B16>; +defm SUST_B_1D_I32_ZERO : SUST_1D<"sust.b.1d.b32.zero", B32>; +defm SUST_B_1D_I64_ZERO : SUST_1D<"sust.b.1d.b64.zero", B64>; -defm SUST_P_1D_I8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>; -defm SUST_P_1D_I16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>; -defm SUST_P_1D_I32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>; +defm SUST_P_1D_I8_TRAP : SUST_1D<"sust.p.1d.b8.trap", B16>; +defm SUST_P_1D_I16_TRAP : SUST_1D<"sust.p.1d.b16.trap", B16>; +defm SUST_P_1D_I32_TRAP : SUST_1D<"sust.p.1d.b32.trap", B32>; class SUST_1D_V2_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)), + !con(surf, (ins B32:$x, intype:$r, intype:$g)), inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};", pat>; multiclass SUST_1D_V2 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_1D_V2_base; + def _R : SUST_1D_V2_base; def _I : SUST_1D_V2_base; } // int_nvvm_sust_b_1d_v2i8_clamp -defm SUST_B_1D_V2I8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>; -defm SUST_B_1D_V2I16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>; -defm SUST_B_1D_V2I32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>; -defm SUST_B_1D_V2I64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>; +defm SUST_B_1D_V2I8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", B16>; +defm SUST_B_1D_V2I16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", B16>; +defm SUST_B_1D_V2I32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", B32>; +defm SUST_B_1D_V2I64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", B64>; -defm SUST_B_1D_V2I8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>; -defm SUST_B_1D_V2I16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>; -defm SUST_B_1D_V2I32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>; -defm SUST_B_1D_V2I64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>; +defm SUST_B_1D_V2I8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", B16>; +defm SUST_B_1D_V2I16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", B16>; +defm SUST_B_1D_V2I32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", B32>; +defm SUST_B_1D_V2I64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", B64>; -defm SUST_B_1D_V2I8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>; -defm SUST_B_1D_V2I16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>; -defm SUST_B_1D_V2I32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>; -defm SUST_B_1D_V2I64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>; +defm SUST_B_1D_V2I8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", B16>; +defm SUST_B_1D_V2I16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", B16>; +defm SUST_B_1D_V2I32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", B32>; +defm SUST_B_1D_V2I64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", B64>; -defm SUST_P_1D_V2I8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>; -defm SUST_P_1D_V2I16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>; -defm SUST_P_1D_V2I32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>; +defm SUST_P_1D_V2I8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", B16>; +defm SUST_P_1D_V2I16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", B16>; +defm SUST_P_1D_V2I32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", B32>; class SUST_1D_V4_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g, + !con(surf, (ins B32:$x, intype:$r, intype:$g, intype:$b, intype:$a)), inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", pat>; multiclass SUST_1D_V4 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_1D_V4_base; def _I : SUST_1D_V4_base; } -defm SUST_B_1D_V4I8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>; -defm SUST_B_1D_V4I16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>; -defm SUST_B_1D_V4I32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>; +defm SUST_B_1D_V4I8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", B16>; +defm SUST_B_1D_V4I16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", B16>; +defm SUST_B_1D_V4I32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", B32>; -defm SUST_B_1D_V4I8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>; -defm SUST_B_1D_V4I16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>; -defm SUST_B_1D_V4I32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>; +defm SUST_B_1D_V4I8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", B16>; +defm SUST_B_1D_V4I16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", B16>; +defm SUST_B_1D_V4I32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", B32>; -defm SUST_B_1D_V4I8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>; -defm SUST_B_1D_V4I16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>; -defm SUST_B_1D_V4I32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>; +defm SUST_B_1D_V4I8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", B16>; +defm SUST_B_1D_V4I16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", B16>; +defm SUST_B_1D_V4I32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", B32>; -defm SUST_P_1D_V4I8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>; -defm SUST_P_1D_V4I16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>; -defm SUST_P_1D_V4I32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>; +defm SUST_P_1D_V4I8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", B16>; +defm SUST_P_1D_V4I16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", B16>; +defm SUST_P_1D_V4I32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", B32>; class SUST_1D_ARRAY_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)), + !con(surf, (ins B32:$idx, B32:$x, intype:$r)), inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};", pat>; multiclass SUST_1D_ARRAY { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_1D_ARRAY_base; + def _R : SUST_1D_ARRAY_base; def _I : SUST_1D_ARRAY_base; } -defm SUST_B_1D_ARRAY_I8_CLAMP - : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>; -defm SUST_B_1D_ARRAY_I16_CLAMP - : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>; -defm SUST_B_1D_ARRAY_I32_CLAMP - : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>; -defm SUST_B_1D_ARRAY_I64_CLAMP - : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>; - -defm SUST_B_1D_ARRAY_I8_TRAP - : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>; -defm SUST_B_1D_ARRAY_I16_TRAP - : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>; -defm SUST_B_1D_ARRAY_I32_TRAP - : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>; -defm SUST_B_1D_ARRAY_I64_TRAP - : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>; - -defm SUST_B_1D_ARRAY_I8_ZERO - : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>; -defm SUST_B_1D_ARRAY_I16_ZERO - : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>; -defm SUST_B_1D_ARRAY_I32_ZERO - : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>; -defm SUST_B_1D_ARRAY_I64_ZERO - : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>; - -defm SUST_P_1D_ARRAY_I8_TRAP - : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>; -defm SUST_P_1D_ARRAY_I16_TRAP - : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>; -defm SUST_P_1D_ARRAY_I32_TRAP - : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>; +defm SUST_B_1D_ARRAY_I8_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", B16>; +defm SUST_B_1D_ARRAY_I16_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", B16>; +defm SUST_B_1D_ARRAY_I32_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", B32>; +defm SUST_B_1D_ARRAY_I64_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", B64>; + +defm SUST_B_1D_ARRAY_I8_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", B16>; +defm SUST_B_1D_ARRAY_I16_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", B16>; +defm SUST_B_1D_ARRAY_I32_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", B32>; +defm SUST_B_1D_ARRAY_I64_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", B64>; + +defm SUST_B_1D_ARRAY_I8_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", B16>; +defm SUST_B_1D_ARRAY_I16_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", B16>; +defm SUST_B_1D_ARRAY_I32_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", B32>; +defm SUST_B_1D_ARRAY_I64_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", B64>; + +defm SUST_P_1D_ARRAY_I8_TRAP : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", B16>; +defm SUST_P_1D_ARRAY_I16_TRAP : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", B16>; +defm SUST_P_1D_ARRAY_I32_TRAP : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", B32>; class SUST_1D_ARRAY_V2_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, + !con(surf, (ins B32:$idx, B32:$x, intype:$r, intype:$g)), inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", pat>; multiclass SUST_1D_ARRAY_V2 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_1D_ARRAY_V2_base; def _I : SUST_1D_ARRAY_V2_base; } -defm SUST_B_1D_ARRAY_V2I8_CLAMP - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>; -defm SUST_B_1D_ARRAY_V2I16_CLAMP - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>; -defm SUST_B_1D_ARRAY_V2I32_CLAMP - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>; -defm SUST_B_1D_ARRAY_V2I64_CLAMP - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>; - -defm SUST_B_1D_ARRAY_V2I8_TRAP - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>; -defm SUST_B_1D_ARRAY_V2I16_TRAP - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>; -defm SUST_B_1D_ARRAY_V2I32_TRAP - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>; -defm SUST_B_1D_ARRAY_V2I64_TRAP - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>; - -defm SUST_B_1D_ARRAY_V2I8_ZERO - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>; -defm SUST_B_1D_ARRAY_V2I16_ZERO - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>; -defm SUST_B_1D_ARRAY_V2I32_ZERO - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>; -defm SUST_B_1D_ARRAY_V2I64_ZERO - : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>; - -defm SUST_P_1D_ARRAY_V2I8_TRAP - : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>; -defm SUST_P_1D_ARRAY_V2I16_TRAP - : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>; -defm SUST_P_1D_ARRAY_V2I32_TRAP - : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>; +defm SUST_B_1D_ARRAY_V2I8_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", B16>; +defm SUST_B_1D_ARRAY_V2I16_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", B16>; +defm SUST_B_1D_ARRAY_V2I32_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", B32>; +defm SUST_B_1D_ARRAY_V2I64_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", B64>; + +defm SUST_B_1D_ARRAY_V2I8_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", B16>; +defm SUST_B_1D_ARRAY_V2I16_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", B16>; +defm SUST_B_1D_ARRAY_V2I32_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", B32>; +defm SUST_B_1D_ARRAY_V2I64_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", B64>; + +defm SUST_B_1D_ARRAY_V2I8_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", B16>; +defm SUST_B_1D_ARRAY_V2I16_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", B16>; +defm SUST_B_1D_ARRAY_V2I32_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", B32>; +defm SUST_B_1D_ARRAY_V2I64_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", B64>; + +defm SUST_P_1D_ARRAY_V2I8_TRAP : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", B16>; +defm SUST_P_1D_ARRAY_V2I16_TRAP : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", B16>; +defm SUST_P_1D_ARRAY_V2I32_TRAP : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", B32>; class SUST_1D_ARRAY_V4_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, + !con(surf, (ins B32:$idx, B32:$x, intype:$r, intype:$g, intype:$b, intype:$a)), inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};", pat>; multiclass SUST_1D_ARRAY_V4 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_1D_ARRAY_V4_base; def _I : SUST_1D_ARRAY_V4_base; } defm SUST_B_1D_ARRAY_V4I8_CLAMP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>; + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", B16>; defm SUST_B_1D_ARRAY_V4I16_CLAMP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>; + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", B16>; defm SUST_B_1D_ARRAY_V4I32_CLAMP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>; + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", B32>; defm SUST_B_1D_ARRAY_V4I8_TRAP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>; + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", B16>; defm SUST_B_1D_ARRAY_V4I16_TRAP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>; + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", B16>; defm SUST_B_1D_ARRAY_V4I32_TRAP - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>; + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", B32>; defm SUST_B_1D_ARRAY_V4I8_ZERO - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>; + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", B16>; defm SUST_B_1D_ARRAY_V4I16_ZERO - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>; + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", B16>; defm SUST_B_1D_ARRAY_V4I32_ZERO - : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>; + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", B32>; defm SUST_P_1D_ARRAY_V4I8_TRAP - : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>; + : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", B16>; defm SUST_P_1D_ARRAY_V4I16_TRAP - : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>; + : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", B16>; defm SUST_P_1D_ARRAY_V4I32_TRAP - : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>; + : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", B32>; class SUST_2D_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)), + !con(surf, (ins B32:$x, B32:$y, intype:$r)), inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};", pat>; multiclass SUST_2D { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_2D_base; + def _R : SUST_2D_base; def _I : SUST_2D_base; } -defm SUST_B_2D_I8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>; -defm SUST_B_2D_I16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>; -defm SUST_B_2D_I32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>; -defm SUST_B_2D_I64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>; +defm SUST_B_2D_I8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", B16>; +defm SUST_B_2D_I16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", B16>; +defm SUST_B_2D_I32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", B32>; +defm SUST_B_2D_I64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", B64>; -defm SUST_B_2D_I8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>; -defm SUST_B_2D_I16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>; -defm SUST_B_2D_I32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>; -defm SUST_B_2D_I64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>; +defm SUST_B_2D_I8_TRAP : SUST_2D<"sust.b.2d.b8.trap", B16>; +defm SUST_B_2D_I16_TRAP : SUST_2D<"sust.b.2d.b16.trap", B16>; +defm SUST_B_2D_I32_TRAP : SUST_2D<"sust.b.2d.b32.trap", B32>; +defm SUST_B_2D_I64_TRAP : SUST_2D<"sust.b.2d.b64.trap", B64>; -defm SUST_B_2D_I8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>; -defm SUST_B_2D_I16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>; -defm SUST_B_2D_I32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>; -defm SUST_B_2D_I64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>; +defm SUST_B_2D_I8_ZERO : SUST_2D<"sust.b.2d.b8.zero", B16>; +defm SUST_B_2D_I16_ZERO : SUST_2D<"sust.b.2d.b16.zero", B16>; +defm SUST_B_2D_I32_ZERO : SUST_2D<"sust.b.2d.b32.zero", B32>; +defm SUST_B_2D_I64_ZERO : SUST_2D<"sust.b.2d.b64.zero", B64>; -defm SUST_P_2D_I8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>; -defm SUST_P_2D_I16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>; -defm SUST_P_2D_I32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>; +defm SUST_P_2D_I8_TRAP : SUST_2D<"sust.p.2d.b8.trap", B16>; +defm SUST_P_2D_I16_TRAP : SUST_2D<"sust.p.2d.b16.trap", B16>; +defm SUST_P_2D_I32_TRAP : SUST_2D<"sust.p.2d.b32.trap", B32>; class SUST_2D_V2_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y, + !con(surf, (ins B32:$x, B32:$y, intype:$r, intype:$g)), inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", pat>; multiclass SUST_2D_V2 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_2D_V2_base; def _I : SUST_2D_V2_base; } -defm SUST_B_2D_V2I8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>; -defm SUST_B_2D_V2I16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>; -defm SUST_B_2D_V2I32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>; -defm SUST_B_2D_V2I64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>; +defm SUST_B_2D_V2I8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", B16>; +defm SUST_B_2D_V2I16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", B16>; +defm SUST_B_2D_V2I32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", B32>; +defm SUST_B_2D_V2I64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", B64>; -defm SUST_B_2D_V2I8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>; -defm SUST_B_2D_V2I16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>; -defm SUST_B_2D_V2I32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>; -defm SUST_B_2D_V2I64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>; +defm SUST_B_2D_V2I8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", B16>; +defm SUST_B_2D_V2I16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", B16>; +defm SUST_B_2D_V2I32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", B32>; +defm SUST_B_2D_V2I64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", B64>; -defm SUST_B_2D_V2I8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>; -defm SUST_B_2D_V2I16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>; -defm SUST_B_2D_V2I32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>; -defm SUST_B_2D_V2I64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>; +defm SUST_B_2D_V2I8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", B16>; +defm SUST_B_2D_V2I16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", B16>; +defm SUST_B_2D_V2I32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", B32>; +defm SUST_B_2D_V2I64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", B64>; -defm SUST_P_2D_V2I8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>; -defm SUST_P_2D_V2I16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>; -defm SUST_P_2D_V2I32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>; +defm SUST_P_2D_V2I8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", B16>; +defm SUST_P_2D_V2I16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", B16>; +defm SUST_P_2D_V2I32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", B32>; class SUST_2D_V4_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y, + !con(surf, (ins B32:$x, B32:$y, intype:$r, intype:$g, intype:$b, intype:$a)), inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};", pat>; multiclass SUST_2D_V4 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_2D_V4_base; def _I : SUST_2D_V4_base; } -defm SUST_B_2D_V4I8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>; -defm SUST_B_2D_V4I16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>; -defm SUST_B_2D_V4I32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>; +defm SUST_B_2D_V4I8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", B16>; +defm SUST_B_2D_V4I16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", B16>; +defm SUST_B_2D_V4I32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", B32>; -defm SUST_B_2D_V4I8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>; -defm SUST_B_2D_V4I16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>; -defm SUST_B_2D_V4I32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>; +defm SUST_B_2D_V4I8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", B16>; +defm SUST_B_2D_V4I16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", B16>; +defm SUST_B_2D_V4I32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", B32>; -defm SUST_B_2D_V4I8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>; -defm SUST_B_2D_V4I16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>; -defm SUST_B_2D_V4I32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>; +defm SUST_B_2D_V4I8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", B16>; +defm SUST_B_2D_V4I16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", B16>; +defm SUST_B_2D_V4I32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", B32>; -defm SUST_P_2D_V4I8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>; -defm SUST_P_2D_V4I16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>; -defm SUST_P_2D_V4I32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>; +defm SUST_P_2D_V4I8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", B16>; +defm SUST_P_2D_V4I16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", B16>; +defm SUST_P_2D_V4I32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", B32>; class SUST_2D_ARRAY_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + !con(surf, (ins B32:$idx, B32:$x, B32:$y, intype:$r)), inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", pat>; multiclass SUST_2D_ARRAY { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_2D_ARRAY_base; def _I : SUST_2D_ARRAY_base; } -defm SUST_B_2D_ARRAY_I8_CLAMP - : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>; -defm SUST_B_2D_ARRAY_I16_CLAMP - : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>; -defm SUST_B_2D_ARRAY_I32_CLAMP - : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>; -defm SUST_B_2D_ARRAY_I64_CLAMP - : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>; - -defm SUST_B_2D_ARRAY_I8_TRAP - : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>; -defm SUST_B_2D_ARRAY_I16_TRAP - : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>; -defm SUST_B_2D_ARRAY_I32_TRAP - : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>; -defm SUST_B_2D_ARRAY_I64_TRAP - : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>; - -defm SUST_B_2D_ARRAY_I8_ZERO - : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>; -defm SUST_B_2D_ARRAY_I16_ZERO - : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>; -defm SUST_B_2D_ARRAY_I32_ZERO - : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>; -defm SUST_B_2D_ARRAY_I64_ZERO - : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>; - -defm SUST_P_2D_ARRAY_I8_TRAP - : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>; -defm SUST_P_2D_ARRAY_I16_TRAP - : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>; -defm SUST_P_2D_ARRAY_I32_TRAP - : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>; +defm SUST_B_2D_ARRAY_I8_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", B16>; +defm SUST_B_2D_ARRAY_I16_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", B16>; +defm SUST_B_2D_ARRAY_I32_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", B32>; +defm SUST_B_2D_ARRAY_I64_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", B64>; + +defm SUST_B_2D_ARRAY_I8_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", B16>; +defm SUST_B_2D_ARRAY_I16_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", B16>; +defm SUST_B_2D_ARRAY_I32_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", B32>; +defm SUST_B_2D_ARRAY_I64_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", B64>; + +defm SUST_B_2D_ARRAY_I8_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", B16>; +defm SUST_B_2D_ARRAY_I16_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", B16>; +defm SUST_B_2D_ARRAY_I32_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", B32>; +defm SUST_B_2D_ARRAY_I64_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", B64>; + +defm SUST_P_2D_ARRAY_I8_TRAP : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", B16>; +defm SUST_P_2D_ARRAY_I16_TRAP : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", B16>; +defm SUST_P_2D_ARRAY_I32_TRAP : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", B32>; class SUST_2D_ARRAY_V2_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + !con(surf, (ins B32:$idx, B32:$x, B32:$y, intype:$r, intype:$g)), inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};", pat>; multiclass SUST_2D_ARRAY_V2 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_2D_ARRAY_V2_base; def _I : SUST_2D_ARRAY_V2_base; } -defm SUST_B_2D_ARRAY_V2I8_CLAMP - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>; -defm SUST_B_2D_ARRAY_V2I16_CLAMP - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>; -defm SUST_B_2D_ARRAY_V2I32_CLAMP - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>; -defm SUST_B_2D_ARRAY_V2I64_CLAMP - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>; - -defm SUST_B_2D_ARRAY_V2I8_TRAP - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>; -defm SUST_B_2D_ARRAY_V2I16_TRAP - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>; -defm SUST_B_2D_ARRAY_V2I32_TRAP - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>; -defm SUST_B_2D_ARRAY_V2I64_TRAP - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>; - -defm SUST_B_2D_ARRAY_V2I8_ZERO - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>; -defm SUST_B_2D_ARRAY_V2I16_ZERO - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>; -defm SUST_B_2D_ARRAY_V2I32_ZERO - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>; -defm SUST_B_2D_ARRAY_V2I64_ZERO - : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>; - -defm SUST_P_2D_ARRAY_V2I8_TRAP - : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>; -defm SUST_P_2D_ARRAY_V2I16_TRAP - : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>; -defm SUST_P_2D_ARRAY_V2I32_TRAP - : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>; +defm SUST_B_2D_ARRAY_V2I8_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", B16>; +defm SUST_B_2D_ARRAY_V2I16_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", B16>; +defm SUST_B_2D_ARRAY_V2I32_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", B32>; +defm SUST_B_2D_ARRAY_V2I64_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", B64>; + +defm SUST_B_2D_ARRAY_V2I8_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", B16>; +defm SUST_B_2D_ARRAY_V2I16_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", B16>; +defm SUST_B_2D_ARRAY_V2I32_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", B32>; +defm SUST_B_2D_ARRAY_V2I64_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", B64>; + +defm SUST_B_2D_ARRAY_V2I8_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", B16>; +defm SUST_B_2D_ARRAY_V2I16_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", B16>; +defm SUST_B_2D_ARRAY_V2I32_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", B32>; +defm SUST_B_2D_ARRAY_V2I64_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", B64>; + +defm SUST_P_2D_ARRAY_V2I8_TRAP : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", B16>; +defm SUST_P_2D_ARRAY_V2I16_TRAP : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", B16>; +defm SUST_P_2D_ARRAY_V2I32_TRAP : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", B32>; class SUST_2D_ARRAY_V4_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + !con(surf, (ins B32:$idx, B32:$x, B32:$y, intype:$r, intype:$g, intype:$b, intype:$a)), inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};", pat>; multiclass SUST_2D_ARRAY_V4 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_2D_ARRAY_V4_base; def _I : SUST_2D_ARRAY_V4_base; } -defm SUST_B_2D_ARRAY_V4I8_CLAMP - : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>; -defm SUST_B_2D_ARRAY_V4I16_CLAMP - : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>; -defm SUST_B_2D_ARRAY_V4I32_CLAMP - : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>; - -defm SUST_B_2D_ARRAY_V4I8_TRAP - : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>; -defm SUST_B_2D_ARRAY_V4I16_TRAP - : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>; -defm SUST_B_2D_ARRAY_V4I32_TRAP - : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>; - -defm SUST_B_2D_ARRAY_V4I8_ZERO - : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>; -defm SUST_B_2D_ARRAY_V4I16_ZERO - : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>; -defm SUST_B_2D_ARRAY_V4I32_ZERO - : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>; - -defm SUST_P_2D_ARRAY_V4I8_TRAP - : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>; -defm SUST_P_2D_ARRAY_V4I16_TRAP - : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>; -defm SUST_P_2D_ARRAY_V4I32_TRAP - : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>; +defm SUST_B_2D_ARRAY_V4I8_CLAMP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", B16>; +defm SUST_B_2D_ARRAY_V4I16_CLAMP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", B16>; +defm SUST_B_2D_ARRAY_V4I32_CLAMP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", B32>; + +defm SUST_B_2D_ARRAY_V4I8_TRAP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", B16>; +defm SUST_B_2D_ARRAY_V4I16_TRAP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", B16>; +defm SUST_B_2D_ARRAY_V4I32_TRAP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", B32>; + +defm SUST_B_2D_ARRAY_V4I8_ZERO : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", B16>; +defm SUST_B_2D_ARRAY_V4I16_ZERO : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", B16>; +defm SUST_B_2D_ARRAY_V4I32_ZERO : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", B32>; + +defm SUST_P_2D_ARRAY_V4I8_TRAP : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", B16>; +defm SUST_P_2D_ARRAY_V4I16_TRAP : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", B16>; +defm SUST_P_2D_ARRAY_V4I32_TRAP : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", B32>; class SUST_3D_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + !con(surf, (ins B32:$x, B32:$y, B32:$z, intype:$r)), inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", pat>; multiclass SUST_3D { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_3D_base; def _I : SUST_3D_base; } -defm SUST_B_3D_I8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>; -defm SUST_B_3D_I16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>; -defm SUST_B_3D_I32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>; -defm SUST_B_3D_I64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>; +defm SUST_B_3D_I8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", B16>; +defm SUST_B_3D_I16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", B16>; +defm SUST_B_3D_I32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", B32>; +defm SUST_B_3D_I64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", B64>; -defm SUST_B_3D_I8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>; -defm SUST_B_3D_I16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>; -defm SUST_B_3D_I32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>; -defm SUST_B_3D_I64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>; +defm SUST_B_3D_I8_TRAP : SUST_3D<"sust.b.3d.b8.trap", B16>; +defm SUST_B_3D_I16_TRAP : SUST_3D<"sust.b.3d.b16.trap", B16>; +defm SUST_B_3D_I32_TRAP : SUST_3D<"sust.b.3d.b32.trap", B32>; +defm SUST_B_3D_I64_TRAP : SUST_3D<"sust.b.3d.b64.trap", B64>; -defm SUST_B_3D_I8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>; -defm SUST_B_3D_I16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>; -defm SUST_B_3D_I32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>; -defm SUST_B_3D_I64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>; +defm SUST_B_3D_I8_ZERO : SUST_3D<"sust.b.3d.b8.zero", B16>; +defm SUST_B_3D_I16_ZERO : SUST_3D<"sust.b.3d.b16.zero", B16>; +defm SUST_B_3D_I32_ZERO : SUST_3D<"sust.b.3d.b32.zero", B32>; +defm SUST_B_3D_I64_ZERO : SUST_3D<"sust.b.3d.b64.zero", B64>; -defm SUST_P_3D_I8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>; -defm SUST_P_3D_I16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>; -defm SUST_P_3D_I32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>; +defm SUST_P_3D_I8_TRAP : SUST_3D<"sust.p.3d.b8.trap", B16>; +defm SUST_P_3D_I16_TRAP : SUST_3D<"sust.p.3d.b16.trap", B16>; +defm SUST_P_3D_I32_TRAP : SUST_3D<"sust.p.3d.b32.trap", B32>; class SUST_3D_V2_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + !con(surf, (ins B32:$x, B32:$y, B32:$z, intype:$r, intype:$g)), inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};", pat>; multiclass SUST_3D_V2 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_3D_V2_base; def _I : SUST_3D_V2_base; } -defm SUST_B_3D_V2I8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>; -defm SUST_B_3D_V2I16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>; -defm SUST_B_3D_V2I32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>; -defm SUST_B_3D_V2I64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>; +defm SUST_B_3D_V2I8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", B16>; +defm SUST_B_3D_V2I16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", B16>; +defm SUST_B_3D_V2I32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", B32>; +defm SUST_B_3D_V2I64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", B64>; -defm SUST_B_3D_V2I8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>; -defm SUST_B_3D_V2I16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>; -defm SUST_B_3D_V2I32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>; -defm SUST_B_3D_V2I64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>; +defm SUST_B_3D_V2I8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", B16>; +defm SUST_B_3D_V2I16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", B16>; +defm SUST_B_3D_V2I32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", B32>; +defm SUST_B_3D_V2I64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", B64>; -defm SUST_B_3D_V2I8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>; -defm SUST_B_3D_V2I16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>; -defm SUST_B_3D_V2I32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>; -defm SUST_B_3D_V2I64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>; +defm SUST_B_3D_V2I8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", B16>; +defm SUST_B_3D_V2I16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", B16>; +defm SUST_B_3D_V2I32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", B32>; +defm SUST_B_3D_V2I64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", B64>; -defm SUST_P_3D_V2I8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>; -defm SUST_P_3D_V2I16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>; -defm SUST_P_3D_V2I32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>; +defm SUST_P_3D_V2I8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", B16>; +defm SUST_P_3D_V2I16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", B16>; +defm SUST_P_3D_V2I32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", B32>; class SUST_3D_V4_base pat> : NVPTXInst<(outs), - !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + !con(surf, (ins B32:$x, B32:$y, B32:$z, intype:$r, intype:$g, intype:$b, intype:$a)), inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};", pat>; multiclass SUST_3D_V4 { defvar intr = !cast("int_nvvm_" # !tolower(NAME)); - def _R : SUST_3D_V4_base; def _I : SUST_3D_V4_base; } -defm SUST_B_3D_V4I8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>; -defm SUST_B_3D_V4I16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>; -defm SUST_B_3D_V4I32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>; +defm SUST_B_3D_V4I8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", B16>; +defm SUST_B_3D_V4I16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", B16>; +defm SUST_B_3D_V4I32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", B32>; -defm SUST_B_3D_V4I8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>; -defm SUST_B_3D_V4I16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>; -defm SUST_B_3D_V4I32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>; +defm SUST_B_3D_V4I8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", B16>; +defm SUST_B_3D_V4I16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", B16>; +defm SUST_B_3D_V4I32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", B32>; -defm SUST_B_3D_V4I8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>; -defm SUST_B_3D_V4I16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>; -defm SUST_B_3D_V4I32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>; +defm SUST_B_3D_V4I8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", B16>; +defm SUST_B_3D_V4I16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", B16>; +defm SUST_B_3D_V4I32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", B32>; -defm SUST_P_3D_V4I8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>; -defm SUST_P_3D_V4I16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>; -defm SUST_P_3D_V4I32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>; +defm SUST_P_3D_V4I8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", B16>; +defm SUST_P_3D_V4I16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", B16>; +defm SUST_P_3D_V4I32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", B32>; } @@ -5027,13 +4363,13 @@ defm SUST_P_3D_V4I32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>; //----------------------------------- class PTX_READ_SREG_R64 Preds=[]> - : NVPTXInst<(outs Int64Regs:$d), (ins), + : NVPTXInst<(outs B64:$d), (ins), "mov.u64 \t$d, %" # regname # ";", [(set i64:$d, (intop))]>, Requires; class PTX_READ_SREG_R32 Preds=[]> - : NVPTXInst<(outs Int32Regs:$d), (ins), + : NVPTXInst<(outs B32:$d), (ins), "mov.u32 \t$d, %" # regname # ";", [(set i32:$d, (intop))]>, Requires; @@ -5072,18 +4408,12 @@ def INT_PTX_SREG_CLUSTER_NCTARANK: [hasSM<90>, hasPTX<78>]>; -def INT_PTX_SREG_LANEID : - PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; -def INT_PTX_SREG_WARPID : - PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; -def INT_PTX_SREG_NWARPID : - PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; -def INT_PTX_SREG_SMID : - PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; -def INT_PTX_SREG_NSMID : - PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; -def INT_PTX_SREG_GRIDID : - PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; +def SREG_LANEID : PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; +def SREG_WARPID : PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; +def SREG_NWARPID : PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; +def SREG_SMID : PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; +def SREG_NSMID : PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; +def SREG_GRIDID : PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; def INT_PTX_SREG_LANEMASK_EQ : PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; @@ -5097,16 +4427,13 @@ def INT_PTX_SREG_LANEMASK_GT : PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; let hasSideEffects = 1 in { -def INT_PTX_SREG_CLOCK : - PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; -def INT_PTX_SREG_CLOCK64 : - PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; -def INT_PTX_SREG_GLOBALTIMER : - PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>; +def SREG_CLOCK : PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; +def SREG_CLOCK64 : PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; +def SREG_GLOBALTIMER : PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>; } -def: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>; -def: Pat <(i64 (readsteadycounter)), (INT_PTX_SREG_GLOBALTIMER)>; +def: Pat <(i64 (readcyclecounter)), (SREG_CLOCK64)>; +def: Pat <(i64 (readsteadycounter)), (SREG_GLOBALTIMER)>; def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; @@ -5116,7 +4443,7 @@ def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't // handle the constant. def INT_PTX_SREG_WARPSIZE : - NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", + NVPTXInst<(outs B32:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", [(set i32:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; // Helper class that represents a 'fragment' of an NVPTX *MMA instruction. @@ -5127,21 +4454,21 @@ class WMMA_REGINFO : WMMA_REGS { // NVPTX register types used to carry fragment data. NVPTXRegClass regclass = !cond( - !eq(ptx_elt_type, "f16") : Int32Regs, - !eq(ptx_elt_type, "f32") : Float32Regs, - !eq(ptx_elt_type, "f64") : Float64Regs, - !eq(ptx_elt_type, "bf16") : Int32Regs, - !eq(ptx_elt_type, "tf32") : Int32Regs, - !eq(ptx_elt_type, "s32") : Int32Regs, - !eq(ptx_elt_type, "b16") : Int32Regs, - !eq(ptx_elt_type, "b8") : Int32Regs, - !eq(ptx_elt_type, "b8x16.b6x16_p32") : Int32Regs, - !eq(ptx_elt_type, "b8x16.b4x16_p64") : Int32Regs, - !eq(ptx_elt_type, "s8") : Int32Regs, - !eq(ptx_elt_type, "u8") : Int32Regs, - !eq(ptx_elt_type, "s4") : Int32Regs, - !eq(ptx_elt_type, "u4") : Int32Regs, - !eq(ptx_elt_type, "b1") : Int32Regs); + !eq(ptx_elt_type, "f16") : B32, + !eq(ptx_elt_type, "f32") : B32, + !eq(ptx_elt_type, "f64") : B64, + !eq(ptx_elt_type, "bf16") : B32, + !eq(ptx_elt_type, "tf32") : B32, + !eq(ptx_elt_type, "s32") : B32, + !eq(ptx_elt_type, "b16") : B32, + !eq(ptx_elt_type, "b8") : B32, + !eq(ptx_elt_type, "b8x16.b6x16_p32") : B32, + !eq(ptx_elt_type, "b8x16.b4x16_p64") : B32, + !eq(ptx_elt_type, "s8") : B32, + !eq(ptx_elt_type, "u8") : B32, + !eq(ptx_elt_type, "s4") : B32, + !eq(ptx_elt_type, "u4") : B32, + !eq(ptx_elt_type, "b1") : B32); // Instruction input/output arguments for the fragment. list ptx_regs = !listsplat(regclass, !size(regs)); @@ -5284,7 +4611,7 @@ class WMMA_INSTR _Args> class WMMA_LOAD : WMMA_INSTR.record, [!con((ins ADDR:$src), - !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, + !if(WithStride, (ins B32:$ldm), (ins)))]>, Requires { // Load/store intrinsics are overloaded on pointer's address space. // To match the right intrinsic, we need to build AS-constrained PatFrag. @@ -5324,7 +4651,7 @@ class WMMA_STORE_D.record, [!con((ins ADDR:$dst), Frag.Ins, - !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, + !if(WithStride, (ins B32:$ldm), (ins)))]>, Requires { // Load/store intrinsics are overloaded on pointer's address space. @@ -5539,19 +4866,19 @@ foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in def : MMA_PAT; multiclass MAPA { - def _32: BasicNVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b), + def _32: BasicNVPTXInst<(outs B32:$d), (ins B32:$a, B32:$b), "mapa" # suffix # ".u32", [(set i32:$d, (Intr i32:$a, i32:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; - def _32i: BasicNVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b), + def _32i: BasicNVPTXInst<(outs B32:$d), (ins B32:$a, i32imm:$b), "mapa" # suffix # ".u32", [(set i32:$d, (Intr i32:$a, imm:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; - def _64: BasicNVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b), + def _64: BasicNVPTXInst<(outs B64:$d), (ins B64:$a, B32:$b), "mapa" # suffix # ".u64", [(set i64:$d, (Intr i64:$a, i32:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; - def _64i: BasicNVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b), + def _64i: BasicNVPTXInst<(outs B64:$d), (ins B64:$a, i32imm:$b), "mapa" # suffix # ".u64", [(set i64:$d, (Intr i64:$a, imm:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; @@ -5562,11 +4889,11 @@ defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluste multiclass GETCTARANK { - def _32: BasicNVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), + def _32: BasicNVPTXInst<(outs B32:$d), (ins B32:$a), "getctarank" # suffix # ".u32", [(set i32:$d, (Intr i32:$a))]>, Requires<[hasSM<90>, hasPTX<78>]>; - def _64: BasicNVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + def _64: BasicNVPTXInst<(outs B32:$d), (ins B64:$a), "getctarank" # suffix # ".u64", [(set i32:$d, (Intr i64:$a))]>, Requires<[hasSM<90>, hasPTX<78>]>; @@ -5575,7 +4902,7 @@ multiclass GETCTARANK { defm getctarank : GETCTARANK<"", int_nvvm_getctarank>; defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>; -def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins), +def is_explicit_cluster: NVPTXInst<(outs B1:$d), (ins), "mov.pred\t$d, %is_explicit_cluster;", [(set i1:$d, (int_nvvm_is_explicit_cluster))]>, Requires<[hasSM<90>, hasPTX<78>]>; @@ -5627,9 +4954,9 @@ let isConvergent = true in { multiclass TCGEN05_ALLOC_INTR { def "" : BasicNVPTXInst<(outs), - (ins ADDR:$dst, Int32Regs:$ncols), + (ins ADDR:$dst, B32:$ncols), "tcgen05.alloc.cta_group::" # num # ".sync.aligned" # AS # ".b32", - [(Intr addr:$dst, Int32Regs:$ncols)]>, + [(Intr addr:$dst, B32:$ncols)]>, Requires<[hasTcgen05Instructions]>; } @@ -5641,9 +4968,9 @@ defm TCGEN05_ALLOC_S64_CG2 : TCGEN05_ALLOC_INTR<".shared::cta", "2", int_nvvm_tc multiclass TCGEN05_DEALLOC_INTR { def "" : BasicNVPTXInst<(outs), - (ins Int32Regs:$tmem_addr, Int32Regs:$ncols), + (ins B32:$tmem_addr, B32:$ncols), "tcgen05.dealloc.cta_group::" # num # ".sync.aligned.b32", - [(Intr Int32Regs:$tmem_addr, Int32Regs:$ncols)]>, + [(Intr B32:$tmem_addr, B32:$ncols)]>, Requires<[hasTcgen05Instructions]>; } defm TCGEN05_DEALLOC_CG1: TCGEN05_DEALLOC_INTR<"1", int_nvvm_tcgen05_dealloc_cg1>; @@ -5677,9 +5004,9 @@ multiclass TCGEN05_COMMIT_INTR { prefix # ".b64", [(Intr addr:$mbar)]>, Requires<[hasTcgen05Instructions]>; - def _MC : BasicNVPTXInst<(outs), (ins ADDR:$mbar, Int16Regs:$mc), + def _MC : BasicNVPTXInst<(outs), (ins ADDR:$mbar, B16:$mc), prefix # ".multicast::cluster.b64", - [(IntrMC addr:$mbar, Int16Regs:$mc)]>, + [(IntrMC addr:$mbar, B16:$mc)]>, Requires<[hasTcgen05Instructions]>; } @@ -5711,14 +5038,14 @@ multiclass TCGEN05_CP_INTR { defvar IntrCG2 = !cast(intr_prefix # "_cg2"); def _cg1 : BasicNVPTXInst<(outs), - (ins ADDR:$tmem_addr, Int64Regs:$sdesc), + (ins ADDR:$tmem_addr, B64:$sdesc), "tcgen05.cp.cta_group::1." # shape_mc_asm # fmt_asm, - [(IntrCG1 addr:$tmem_addr, Int64Regs:$sdesc)]>, + [(IntrCG1 addr:$tmem_addr, B64:$sdesc)]>, Requires<[hasTcgen05Instructions]>; def _cg2 : BasicNVPTXInst<(outs), - (ins ADDR:$tmem_addr, Int64Regs:$sdesc), + (ins ADDR:$tmem_addr, B64:$sdesc), "tcgen05.cp.cta_group::2." # shape_mc_asm # fmt_asm, - [(IntrCG2 addr:$tmem_addr, Int64Regs:$sdesc)]>, + [(IntrCG2 addr:$tmem_addr, B64:$sdesc)]>, Requires<[hasTcgen05Instructions]>; } @@ -5757,7 +5084,7 @@ class TCGEN05_LDST_INST_NAME { // create a list of types for load/store operands - list regs = !listsplat(Int32Regs, Veclen); + list regs = !listsplat(B32, Veclen); // generate list of regnames for load/store operands list reg_names = !foreach(x, !range(0, Veclen), "r" # x); string regstring = "{{" # !interleave(!foreach(n, !range(0, Veclen), "$r" # n), ", ") # "}}"; @@ -5776,7 +5103,7 @@ class TCGEN05_LD_INST : TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO< NVVM_TCGEN05_LDST_ACCESS_SIZE.veclen>; - let InOperandList = !con((ins Int32Regs:$taddr), + let InOperandList = !con((ins B32:$taddr), !if(!eq(Shape, "16x32bx2"), (ins i64imm:$offset), (ins))); let OutOperandList = Info.Outs; let AsmString = "tcgen05.ld.sync.aligned" @@ -5801,7 +5128,7 @@ class TCGEN05_ST_INST : TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO< NVVM_TCGEN05_LDST_ACCESS_SIZE.veclen>; - let InOperandList = !con((ins Int32Regs:$taddr), + let InOperandList = !con((ins B32:$taddr), !if(!eq(Shape, "16x32bx2"), (ins i64imm:$offset), (ins)), Info.Ins); let OutOperandList = (outs); @@ -5836,13 +5163,13 @@ foreach shape = ["16x64b", "16x128b", "16x256b", "32x32b", "16x32bx2"] in { def st_bulk_imm : TImmLeaf; def INT_NVVM_ST_BULK_GENERIC : - BasicNVPTXInst<(outs), (ins ADDR:$dest_addr, Int64Regs:$size, i64imm:$value), + BasicNVPTXInst<(outs), (ins ADDR:$dest_addr, B64:$size, i64imm:$value), "st.bulk", [(int_nvvm_st_bulk addr:$dest_addr, i64:$size, st_bulk_imm:$value)]>, Requires<[hasSM<100>, hasPTX<86>]>; def INT_NVVM_ST_BULK_SHARED_CTA: - BasicNVPTXInst<(outs), (ins ADDR:$dest_addr, Int64Regs:$size, i64imm:$value), + BasicNVPTXInst<(outs), (ins ADDR:$dest_addr, B64:$size, i64imm:$value), "st.bulk.shared::cta", [(int_nvvm_st_bulk_shared_cta addr:$dest_addr, i64:$size, st_bulk_imm:$value)]>, Requires<[hasSM<100>, hasPTX<86>]>; @@ -5870,7 +5197,7 @@ def clusterlaunchcontrol_query_cancel_is_canceled: SDTClusterLaunchControlQueryCancelIsCanceled, []>; def CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED: - NVPTXInst<(outs Int1Regs:$pred), (ins Int64Regs:$try_cancel_response0, Int64Regs:$try_cancel_response1), + NVPTXInst<(outs B1:$pred), (ins B64:$try_cancel_response0, B64:$try_cancel_response1), "{{\n\t" # ".reg .b128 %clc_handle;\n\t" # "mov.b128 %clc_handle, {$try_cancel_response0, $try_cancel_response1};\n\t" # @@ -5880,7 +5207,7 @@ def CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED: Requires<[hasSM<100>, hasPTX<86>]>; class CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID: - NVPTXInst<(outs Int32Regs:$reg), (ins Int64Regs:$try_cancel_response0, Int64Regs:$try_cancel_response1), + NVPTXInst<(outs B32:$reg), (ins B64:$try_cancel_response0, B64:$try_cancel_response1), "{{\n\t" # ".reg .b128 %clc_handle;\n\t" # "mov.b128 %clc_handle, {$try_cancel_response0, $try_cancel_response1};\n\t" # diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index eb60e1502cf90..aa07d510b3a12 100644 --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -25,9 +25,9 @@ using namespace llvm; namespace llvm { StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) { - if (RC == &NVPTX::Int128RegsRegClass) + if (RC == &NVPTX::B128RegClass) return ".b128"; - if (RC == &NVPTX::Int64RegsRegClass) + if (RC == &NVPTX::B64RegClass) // We use untyped (.b) integer registers here as NVCC does. // Correctness of generated code does not depend on register type, // but using .s/.u registers runs into ptxas bug that prevents @@ -47,11 +47,11 @@ StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) { // add.f16v2 rb32,rb32,rb32; // OK // add.f16v2 rs32,rs32,rs32; // OK return ".b64"; - if (RC == &NVPTX::Int32RegsRegClass) + if (RC == &NVPTX::B32RegClass) return ".b32"; - if (RC == &NVPTX::Int16RegsRegClass) + if (RC == &NVPTX::B16RegClass) return ".b16"; - if (RC == &NVPTX::Int1RegsRegClass) + if (RC == &NVPTX::B1RegClass) return ".pred"; if (RC == &NVPTX::SpecialRegsRegClass) return "!Special!"; @@ -59,15 +59,15 @@ StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) { } StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC) { - if (RC == &NVPTX::Int128RegsRegClass) + if (RC == &NVPTX::B128RegClass) return "%rq"; - if (RC == &NVPTX::Int64RegsRegClass) + if (RC == &NVPTX::B64RegClass) return "%rd"; - if (RC == &NVPTX::Int32RegsRegClass) + if (RC == &NVPTX::B32RegClass) return "%r"; - if (RC == &NVPTX::Int16RegsRegClass) + if (RC == &NVPTX::B16RegClass) return "%rs"; - if (RC == &NVPTX::Int1RegsRegClass) + if (RC == &NVPTX::B1RegClass) return "%p"; if (RC == &NVPTX::SpecialRegsRegClass) return "!Special!"; diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td index 2eea9e9721cdf..9fac97d97c609 100644 --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -55,23 +55,15 @@ foreach i = 0...31 in { //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// -def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>; -def Int16Regs : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>; -def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8, f32], 32, +def B1 : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>; +def B16 : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>; +def B32 : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8, f32], 32, (add (sequence "R%u", 0, 4), VRFrame32, VRFrameLocal32)>; -def Int64Regs : NVPTXRegClass<[i64, f64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>; +def B64 : NVPTXRegClass<[i64, f64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>; // 128-bit regs are not defined as general regs in NVPTX. They are used for inlineASM only. -def Int128Regs : NVPTXRegClass<[i128], 128, (add (sequence "RQ%u", 0, 4))>; - -def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 4))>; -def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%u", 0, 4))>; -def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>; -def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>; +def B128 : NVPTXRegClass<[i128], 128, (add (sequence "RQ%u", 0, 4))>; // Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame32, VRFrameLocal32, VRDepot, (sequence "ENVREG%u", 0, 31))>; - -defvar Float32Regs = Int32Regs; -defvar Float64Regs = Int64Regs; diff --git a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir index ef8394005943c..08b89059f80bd 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir @@ -12,8 +12,8 @@ --- name: test registers: - - { id: 0, class: int32regs } - - { id: 1, class: int32regs } + - { id: 0, class: b32 } + - { id: 1, class: b32 } body: | bb.0.entry: %0 = LD_i32 0, 4, 1, 2, 32, &test_param_0, 0 diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir index 146a45a9b1c20..bb36b1df115d1 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir @@ -30,24 +30,24 @@ --- name: test registers: - - { id: 0, class: int32regs } - - { id: 1, class: int64regs } - - { id: 2, class: int32regs } - - { id: 3, class: int64regs } - - { id: 4, class: int32regs } - - { id: 5, class: int32regs } - - { id: 6, class: int32regs } - - { id: 7, class: int32regs } + - { id: 0, class: b32 } + - { id: 1, class: b64 } + - { id: 2, class: b32 } + - { id: 3, class: b64 } + - { id: 4, class: b32 } + - { id: 5, class: b32 } + - { id: 6, class: b32 } + - { id: 7, class: b32 } body: | bb.0.entry: %0 = LD_i32 0, 0, 4, 2, 32, &test_param_0, 0 %1 = CVT_f64_f32 %0, 0 %2 = LD_i32 0, 0, 4, 0, 32, &test_param_1, 0 - ; CHECK: %3:int64regs = FADD_rnf64ri %1, double 3.250000e+00 + ; CHECK: %3:b64 = FADD_rnf64ri %1, double 3.250000e+00 %3 = FADD_rnf64ri %1, double 3.250000e+00 %4 = CVT_f32_f64 %3, 5 %5 = CVT_f32_s32 %2, 5 - ; CHECK: %6:int32regs = FADD_rnf32ri %5, float 6.250000e+00 + ; CHECK: %6:b32 = FADD_rnf32ri %5, float 6.250000e+00 %6 = FADD_rnf32ri %5, float 6.250000e+00 %7 = FMUL_rnf32rr %6, %4 StoreRetvalI32 %7, 0 @@ -56,24 +56,24 @@ body: | --- name: test2 registers: - - { id: 0, class: int32regs } - - { id: 1, class: int64regs } - - { id: 2, class: int32regs } - - { id: 3, class: int64regs } - - { id: 4, class: int32regs } - - { id: 5, class: int32regs } - - { id: 6, class: int32regs } - - { id: 7, class: int32regs } + - { id: 0, class: b32 } + - { id: 1, class: b64 } + - { id: 2, class: b32 } + - { id: 3, class: b64 } + - { id: 4, class: b32 } + - { id: 5, class: b32 } + - { id: 6, class: b32 } + - { id: 7, class: b32 } body: | bb.0.entry: %0 = LD_i32 0, 0, 4, 2, 32, &test2_param_0, 0 %1 = CVT_f64_f32 %0, 0 %2 = LD_i32 0, 0, 4, 0, 32, &test2_param_1, 0 - ; CHECK: %3:int64regs = FADD_rnf64ri %1, double 0x7FF8000000000000 + ; CHECK: %3:b64 = FADD_rnf64ri %1, double 0x7FF8000000000000 %3 = FADD_rnf64ri %1, double 0x7FF8000000000000 %4 = CVT_f32_f64 %3, 5 %5 = CVT_f32_s32 %2, 5 - ; CHECK: %6:int32regs = FADD_rnf32ri %5, float 0x7FF8000000000000 + ; CHECK: %6:b32 = FADD_rnf32ri %5, float 0x7FF8000000000000 %6 = FADD_rnf32ri %5, float 0x7FF8000000000000 %7 = FMUL_rnf32rr %6, %4 StoreRetvalI32 %7, 0 diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir index c5bed1244d50e..71108f8b37175 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir @@ -12,8 +12,8 @@ --- name: test registers: - - { id: 0, class: int32regs } - - { id: 1, class: int32regs } + - { id: 0, class: b32 } + - { id: 1, class: b32 } body: | bb.0.entry: %0 = LD_i32 0, 4, 1, 2, 32, &test_param_0, 0 diff --git a/llvm/test/CodeGen/NVPTX/branch-fold.mir b/llvm/test/CodeGen/NVPTX/branch-fold.mir index b09d889815db7..4d80d52de8da8 100644 --- a/llvm/test/CodeGen/NVPTX/branch-fold.mir +++ b/llvm/test/CodeGen/NVPTX/branch-fold.mir @@ -33,12 +33,12 @@ name: hoge alignment: 1 tracksRegLiveness: true registers: - - { id: 0, class: int64regs } - - { id: 1, class: int64regs } - - { id: 2, class: int1regs } - - { id: 3, class: int64regs } - - { id: 4, class: int1regs } - - { id: 5, class: int64regs } + - { id: 0, class: b64 } + - { id: 1, class: b64 } + - { id: 2, class: b1 } + - { id: 3, class: b64 } + - { id: 4, class: b1 } + - { id: 5, class: b64 } frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -47,18 +47,18 @@ body: | ; CHECK: bb.0.bb: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: CBranch undef %2:int1regs, %bb.3 + ; CHECK-NEXT: CBranch undef %2:b1, %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.bb1.preheader: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:int64regs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:b64 = IMPLICIT_DEF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.bb1: ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[ADDi64ri:%[0-9]+]]:int64regs = ADDi64ri [[ADDi64ri]], 1 - ; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:int1regs = SETP_s64ri [[ADDi64ri]], 1, 2 + ; CHECK-NEXT: [[ADDi64ri:%[0-9]+]]:b64 = ADDi64ri [[ADDi64ri]], 1 + ; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:b1 = SETP_s64ri [[ADDi64ri]], 1, 2 ; CHECK-NEXT: CBranch [[SETP_s64ri]], %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.bb4: @@ -68,16 +68,16 @@ body: | bb.0.bb: successors: %bb.1, %bb.3 - CBranch undef %2:int1regs, %bb.3 + CBranch undef %2:b1, %bb.3 bb.1.bb1.preheader: - %5:int64regs = IMPLICIT_DEF + %5:b64 = IMPLICIT_DEF bb.2.bb1: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - %5:int64regs = ADDi64ri %5, 1 - %4:int1regs = SETP_s64ri %5, 1, 2 + %5:b64 = ADDi64ri %5, 1 + %4:b1 = SETP_s64ri %5, 1, 2 CBranch %4, %bb.2 bb.3.bb4: diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir index c2c87b6b24285..a1d8d0590f160 100644 --- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir +++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir @@ -33,18 +33,18 @@ debugInstrRef: false failsVerification: false tracksDebugUserValues: false registers: - - { id: 0, class: int32regs, preferred-register: '' } - - { id: 1, class: int32regs, preferred-register: '' } - - { id: 2, class: int32regs, preferred-register: '' } - - { id: 3, class: int32regs, preferred-register: '' } - - { id: 4, class: int32regs, preferred-register: '' } - - { id: 5, class: int32regs, preferred-register: '' } - - { id: 6, class: int32regs, preferred-register: '' } - - { id: 7, class: int32regs, preferred-register: '' } - - { id: 8, class: int32regs, preferred-register: '' } - - { id: 9, class: int32regs, preferred-register: '' } - - { id: 10, class: int32regs, preferred-register: '' } - - { id: 11, class: int32regs, preferred-register: '' } + - { id: 0, class: b32, preferred-register: '' } + - { id: 1, class: b32, preferred-register: '' } + - { id: 2, class: b32, preferred-register: '' } + - { id: 3, class: b32, preferred-register: '' } + - { id: 4, class: b32, preferred-register: '' } + - { id: 5, class: b32, preferred-register: '' } + - { id: 6, class: b32, preferred-register: '' } + - { id: 7, class: b32, preferred-register: '' } + - { id: 8, class: b32, preferred-register: '' } + - { id: 9, class: b32, preferred-register: '' } + - { id: 10, class: b32, preferred-register: '' } + - { id: 11, class: b32, preferred-register: '' } liveins: [] frameInfo: isFrameAddressTaken: false @@ -77,20 +77,20 @@ constants: [] machineFunctionInfo: {} body: | bb.0: - %0:int32regs, %1:int32regs, %2:int32regs, %3:int32regs = LoadParamMemV4I32 0 + %0:b32, %1:b32, %2:b32, %3:b32 = LoadParamMemV4I32 0 ; CHECK-NOT: ProxyReg - %4:int32regs = ProxyRegB32 killed %0 - %5:int32regs = ProxyRegB32 killed %1 - %6:int32regs = ProxyRegB32 killed %2 - %7:int32regs = ProxyRegB32 killed %3 + %4:b32 = ProxyRegB32 killed %0 + %5:b32 = ProxyRegB32 killed %1 + %6:b32 = ProxyRegB32 killed %2 + %7:b32 = ProxyRegB32 killed %3 ; CHECK: StoreRetvalV4I32 killed %0, killed %1, killed %2, killed %3 StoreRetvalV4I32 killed %4, killed %5, killed %6, killed %7, 0 - %8:int32regs = LoadParamMemI32 0 + %8:b32 = LoadParamMemI32 0 ; CHECK-NOT: ProxyReg - %9:int32regs = ProxyRegB32 killed %8 - %10:int32regs = ProxyRegB32 killed %9 - %11:int32regs = ProxyRegB32 killed %10 + %9:b32 = ProxyRegB32 killed %8 + %10:b32 = ProxyRegB32 killed %9 + %11:b32 = ProxyRegB32 killed %10 ; CHECK: StoreRetvalI32 killed %8 StoreRetvalI32 killed %11, 0 Return diff --git a/llvm/test/DebugInfo/NVPTX/debug-bool-var.ll b/llvm/test/DebugInfo/NVPTX/debug-bool-var.ll index 7a42268650c63..6055a49c98526 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-bool-var.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-bool-var.ll @@ -13,7 +13,7 @@ entry: ; } ; ; CHECK-LABEL: Machine code for function test1 - ; CHECK: DBG_VALUE %[[#]]:int32regs, $noreg, !"xyz", !DIExpression(), debug-location ![[#]]; test.cu:2 line no:6 + ; CHECK: DBG_VALUE %[[#]]:b32, $noreg, !"xyz", !DIExpression(), debug-location ![[#]]; test.cu:2 line no:6 ; %cmp = icmp eq i32 %gid, 0, !dbg !12 %conv = zext i1 %cmp to i32, !dbg !12 @@ -35,7 +35,7 @@ entry: ; } ; ; CHECK-LABEL: Machine code for function test2 - ; CHECK: DBG_VALUE %[[#]]:int32regs, $noreg, !"abc", !DIExpression(), debug-location ![[#]]; test.cu:12 line no:11 + ; CHECK: DBG_VALUE %[[#]]:b32, $noreg, !"abc", !DIExpression(), debug-location ![[#]]; test.cu:12 line no:11 ; %cmp = icmp eq i32 %gid, 0, !dbg !17 %conv = zext i1 %cmp to i32, !dbg !17