diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 0b26bb9829005..a95c739f1331d 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -357,38 +357,33 @@ class MMA_SIGNATURE { !ne(A.ptx_elt_type, B.ptx_elt_type): [A, B], true: [A] ); - string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type)); + string ret = !foldl("", id_frags, a, b, !strconcat(a, "_", b.ptx_elt_type)); } class WMMA_NAME { string signature = MMA_SIGNATURE.ret; - string llvm = "llvm.nvvm.wmma." - # A.geom - # ".mma" - # b1op - # "." # ALayout - # "." # BLayout - # !if(!ne(Rnd, ""), !strconcat(".", Rnd), "") - # signature - # !if(Satfinite, ".satfinite", ""); - - string record = !subst(".", "_", - !subst("llvm.", "int_", llvm)); + string record = "int_nvvm_wmma_" + # A.geom + # "_mma" + # !subst(".", "_", b1op) + # "_" # ALayout + # "_" # BLayout + # !if(!ne(Rnd, ""), !strconcat("_", Rnd), "") + # signature + # !if(Satfinite, "_satfinite", ""); } class MMA_NAME { string signature = MMA_SIGNATURE.ret; - string llvm = "llvm.nvvm.mma" - # b1op - # "." # A.geom - # "." # ALayout - # "." # BLayout - # !if(Satfinite, ".satfinite", "") - # signature; - string record = !subst(".", "_", - !subst("llvm.", "int_", llvm)); + string record = "int_nvvm_mma" + # !subst(".", "_", b1op) + # "_" # A.geom + # "_" # ALayout + # "_" # BLayout + # !if(Satfinite, "_satfinite", "") + # signature; } class LDMATRIX_NAME { @@ -696,101 +691,6 @@ class SHFL_INFO { [OpType, llvm_i32_ty, llvm_i32_ty]); } -class CP_ASYNC_BULK_TENSOR_G2S_INTR { - string Name = "int_nvvm_cp_async_bulk_tensor_g2s_" # mode # "_" # dim # "d"; - - bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0); - int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0); - list Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets); - list TensorDimsTy = !listsplat(llvm_i32_ty, dim); - list ArgsTy = !listconcat( - [llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr - llvm_shared_ptr_ty, // mbarrier_smem_ptr - llvm_ptr_ty], // tensormap_ptr - TensorDimsTy, // actual tensor dims - Im2ColOffsetsTy, // im2col offsets - [llvm_i16_ty, // cta_mask - llvm_i64_ty, // cache_hint - llvm_i1_ty, // Flag for cta_mask - llvm_i1_ty] // Flag for cache_hint - ); - - int TempFlagsStartIdx = !add(dim, 5); - int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets); - list IntrProp = [IntrConvergent, - WriteOnly>, ReadOnly>, - NoCapture>, NoCapture>, NoCapture>, - ImmArg>, - ImmArg>]; -} - -class CP_ASYNC_BULK_TENSOR_S2G_INTR { - string Name = "int_nvvm_cp_async_bulk_tensor_s2g_" # mode # "_" # dim # "d"; - - list TensorDimsTy = !listsplat(llvm_i32_ty, dim); - list ArgsTy = !listconcat( - [llvm_shared_ptr_ty, // src_smem_ptr - llvm_ptr_ty], // tensormap_ptr - TensorDimsTy, // actual tensor dims - [llvm_i64_ty, // cache_hint - llvm_i1_ty] // Flag for cache_hint - ); - int FlagsStartIdx = !add(dim, 3); - list IntrProp = [IntrConvergent, - ReadOnly>, ReadOnly>, - NoCapture>, NoCapture>, - ImmArg>]; -} - -class CP_ASYNC_BULK_TENSOR_PREFETCH_INTR { - string Name = "int_nvvm_cp_async_bulk_tensor_prefetch_" # mode # "_" # dim # "d"; - - bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0); - int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0); - list Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets); - list TensorDimsTy = !listsplat(llvm_i32_ty, dim); - list ArgsTy = !listconcat( - [llvm_ptr_ty], // tensormap_ptr - TensorDimsTy, // actual tensor dims - Im2ColOffsetsTy, // im2col offsets - [llvm_i64_ty, // cache_hint - llvm_i1_ty] // Flag for cache_hint - ); - - int TempFlagsStartIdx = !add(dim, 2); - int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets); - list IntrProp = [IntrConvergent, - ReadOnly>, NoCapture>, - ImmArg>]; -} - -class CP_ASYNC_BULK_TENSOR_REDUCE_INTR { - string Suffix = op # "_" # mode # "_" # dim # "d"; - string Name = "int_nvvm_cp_async_bulk_tensor_reduce_" # Suffix; - - list TensorDimsTy = !listsplat(llvm_i32_ty, dim); - list ArgsTy = !listconcat( - [llvm_shared_ptr_ty, // src_smem_ptr - llvm_ptr_ty], // tensormap_ptr - TensorDimsTy, // actual tensor dims - [llvm_i64_ty, // cache_hint - llvm_i1_ty] // Flag for cache_hint - ); - int FlagsStartIdx = !add(dim, 3); - list IntrProp = [IntrConvergent, - ReadOnly>, ReadOnly>, - NoCapture>, NoCapture>, - ImmArg>]; -} - -class NVVM_TCGEN05_LDST_NAME { - string intr = "llvm.nvvm.tcgen05." # Op - # "." # Shape - # "." # "x" # !shl(1, Num); - - string record = !subst(".", "_", - !subst("llvm.", "int_", intr)); -} class NVVM_TCGEN05_LDST_ACCESS_SIZE { int shift = !cond(!eq(Shape, "16x128b"): 1, !eq(Shape, "16x256b"): 2, @@ -810,6 +710,28 @@ class NVVM_TCGEN05_LDST_ACCESS_SIZE { true : llvm_void_ty); } +class TexVector types> { + string Name = name; + list Types = types; +} + +def TV_I8 : TexVector<"i8", [llvm_i16_ty]>; +def TV_I16 : TexVector<"i16", [llvm_i16_ty]>; +def TV_I32 : TexVector<"i32", [llvm_i32_ty]>; +def TV_I64 : TexVector<"i64", [llvm_i64_ty]>; +def TV_V2I8 : TexVector<"v2i8", !listsplat(llvm_i16_ty, 2)>; +def TV_V2I16 : TexVector<"v2i16", !listsplat(llvm_i16_ty, 2)>; +def TV_V2I32 : TexVector<"v2i32", !listsplat(llvm_i32_ty, 2)>; +def TV_V2I64 : TexVector<"v2i64", !listsplat(llvm_i64_ty, 2)>; +def TV_V4I8 : TexVector<"v4i8", !listsplat(llvm_i16_ty, 4)>; +def TV_V4I16 : TexVector<"v4i16", !listsplat(llvm_i16_ty, 4)>; +def TV_V4I32 : TexVector<"v4i32", !listsplat(llvm_i32_ty, 4)>; + + +def V4F32 : TexVector<"v4f32", !listsplat(llvm_float_ty, 4)>; +def V4S32 : TexVector<"v4s32", !listsplat(llvm_i32_ty, 4)>; +def V4U32 : TexVector<"v4u32", !listsplat(llvm_i32_ty, 4)>; + class NVVMBuiltin : ClangBuiltin { assert !eq(!substr(NAME, 0, !size("int_nvvm_")), "int_nvvm_"), @@ -828,131 +750,116 @@ let TargetPrefix = "nvvm" in { // // Min Max // + let IntrProperties = [IntrNoMem, IntrSpeculatable, Commutative] in { + foreach operation = ["min", "max"] in { + def int_nvvm_f # operation # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty]>; - foreach operation = ["min", "max"] in { - def int_nvvm_f # operation # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + foreach variant = ["", "_xorsign_abs"] in { + foreach nan = ["", "_nan"] in { + foreach ftz = ["", "_ftz"] in { + def int_nvvm_f # operation # ftz # nan # variant # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; - foreach variant = ["", "_xorsign_abs"] in { - foreach nan = ["", "_nan"] in { - foreach ftz = ["", "_ftz"] in { - def int_nvvm_f # operation # ftz # nan # variant # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + def int_nvvm_f # operation # ftz # nan # variant # _f16 : + DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty]>; - def int_nvvm_f # operation # ftz # nan # variant # _f16 : - DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + def int_nvvm_f # operation # ftz # nan # variant # _f16x2 : + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty]>; - def int_nvvm_f # operation # ftz # nan # variant # _f16x2 : - DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + def int_nvvm_f # operation # ftz # nan # variant # _bf16 : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty, llvm_bfloat_ty]>; - def int_nvvm_f # operation # ftz # nan # variant # _bf16 : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty, llvm_bfloat_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; - - def int_nvvm_f # operation # ftz # nan # variant # _bf16x2 : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty, llvm_v2bf16_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; - } // ftz - } // nan - } // variant - } // operation + def int_nvvm_f # operation # ftz # nan # variant # _bf16x2 : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty, llvm_v2bf16_ty]>; + } // ftz + } // nan + } // variant + } // operation + } // // Multiplication // + let IntrProperties = [IntrNoMem, IntrSpeculatable, Commutative] in { + foreach sign = ["", "u"] in { + def int_nvvm_mulhi_ # sign # s : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty]>; - foreach sign = ["", "u"] in { - def int_nvvm_mulhi_ # sign # s : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; - - def int_nvvm_mulhi_ # sign # i : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + def int_nvvm_mulhi_ # sign # i : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>; - def int_nvvm_mulhi_ # sign # ll : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + def int_nvvm_mulhi_ # sign # ll : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>; - def int_nvvm_mul24_ # sign # i : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; - } + def int_nvvm_mul24_ # sign # i : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>; + } - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_mul_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_mul_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; - def int_nvvm_mul_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + def int_nvvm_mul_ # rnd # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty]>; + } } // // Div // + let IntrProperties = [IntrNoMem] in { + foreach ftz = ["", "_ftz"] in { + def int_nvvm_div_approx # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; - foreach ftz = ["", "_ftz"] in { - def int_nvvm_div_approx # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem]>; - - def int_nvvm_div_full # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem]>; - } + def int_nvvm_div_full # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; + } - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_div_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem]>; + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_div_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; - def int_nvvm_div_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem]>; + def int_nvvm_div_ # rnd # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty]>; + } } // // Sad // + let IntrProperties = [IntrNoMem, Commutative, IntrSpeculatable] in { + foreach sign = ["", "u"] in { + def int_nvvm_sad_ # sign # s : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty]>; - foreach sign = ["", "u"] in { - def int_nvvm_sad_ # sign # s : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [IntrNoMem, Commutative, IntrSpeculatable]>; - - def int_nvvm_sad_ # sign # i : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative, IntrSpeculatable]>; + def int_nvvm_sad_ # sign # i : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; - def int_nvvm_sad_ # sign # ll : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], - [IntrNoMem, Commutative, IntrSpeculatable]>; + def int_nvvm_sad_ # sign # ll : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty]>; + } } // // Floor Ceil // - - foreach op = ["floor", "ceil"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_ # op # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ # op # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; + let IntrProperties = [IntrNoMem, IntrSpeculatable] in { + foreach op = ["floor", "ceil"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_ # op # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; + def int_nvvm_ # op # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; + } } // // Abs // - foreach ftz = ["", "_ftz"] in def int_nvvm_fabs # ftz : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], @@ -961,7 +868,6 @@ let TargetPrefix = "nvvm" in { // // Abs, Neg bf16, bf16x2 // - def int_nvvm_neg_bf16 : NVVMBuiltin, DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty], [IntrNoMem]>; def int_nvvm_neg_bf16x2 : NVVMBuiltin, @@ -970,62 +876,65 @@ let TargetPrefix = "nvvm" in { // // Round // + let IntrProperties = [IntrNoMem, IntrSpeculatable] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_round # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - foreach ftz = ["", "_ftz"] in - def int_nvvm_round # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - - def int_nvvm_round_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_round_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; + } // // Trunc // + let IntrProperties = [IntrNoMem, IntrSpeculatable] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_trunc # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - foreach ftz = ["", "_ftz"] in - def int_nvvm_trunc # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - - def int_nvvm_trunc_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_trunc_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; + } // // Saturate // + let IntrProperties = [IntrNoMem, IntrSpeculatable] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_saturate # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - foreach ftz = ["", "_ftz"] in - def int_nvvm_saturate # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - - def int_nvvm_saturate_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_saturate_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; + } // // Exp2 Log2 // + let IntrProperties = [IntrNoMem] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_ex2_approx # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - foreach ftz = ["", "_ftz"] in - def int_nvvm_ex2_approx # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - - def int_nvvm_ex2_approx_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - def int_nvvm_ex2_approx_f16 : - DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>; - def int_nvvm_ex2_approx_f16x2 : - DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>; + def int_nvvm_ex2_approx_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; + def int_nvvm_ex2_approx_f16 : + DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty]>; + def int_nvvm_ex2_approx_f16x2 : + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty]>; - foreach ftz = ["", "_ftz"] in - def int_nvvm_lg2_approx # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + foreach ftz = ["", "_ftz"] in + def int_nvvm_lg2_approx # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - def int_nvvm_lg2_approx_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + def int_nvvm_lg2_approx_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; + } // // Sin Cos // - foreach op = ["sin", "cos"] in foreach ftz = ["", "_ftz"] in def int_nvvm_ # op # _approx # ftz # _f : NVVMBuiltin, @@ -1034,105 +943,103 @@ let TargetPrefix = "nvvm" in { // // Fma // + let IntrProperties = [IntrNoMem, IntrSpeculatable] in { + foreach variant = ["", "_sat", "_relu"] in { + foreach ftz = ["", "_ftz"] in { + def int_nvvm_fma_rn # ftz # variant # _f16 : + DefaultAttrsIntrinsic<[llvm_half_ty], + [llvm_half_ty, llvm_half_ty, llvm_half_ty]>; + + def int_nvvm_fma_rn # ftz # variant # _f16x2 : + DefaultAttrsIntrinsic<[llvm_v2f16_ty], + [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty]>; + + def int_nvvm_fma_rn # ftz # variant # _bf16 : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_bfloat_ty], + [llvm_bfloat_ty, llvm_bfloat_ty, llvm_bfloat_ty]>; + + def int_nvvm_fma_rn # ftz # variant # _bf16x2 : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_v2bf16_ty], + [llvm_v2bf16_ty, llvm_v2bf16_ty, llvm_v2bf16_ty]>; + } // ftz + } // variant - foreach variant = ["", "_sat", "_relu"] in { - foreach ftz = ["", "_ftz"] in { - def int_nvvm_fma_rn # ftz # variant # _f16 : - DefaultAttrsIntrinsic<[llvm_half_ty], - [llvm_half_ty, llvm_half_ty, llvm_half_ty], - [IntrNoMem, IntrSpeculatable]>; - - def int_nvvm_fma_rn # ftz # variant # _f16x2 : - DefaultAttrsIntrinsic<[llvm_v2f16_ty], - [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty], - [IntrNoMem, IntrSpeculatable]>; - - def int_nvvm_fma_rn # ftz # variant # _bf16 : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_bfloat_ty], - [llvm_bfloat_ty, llvm_bfloat_ty, llvm_bfloat_ty], - [IntrNoMem, IntrSpeculatable]>; - - def int_nvvm_fma_rn # ftz # variant # _bf16x2 : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_v2bf16_ty], - [llvm_v2bf16_ty, llvm_v2bf16_ty, llvm_v2bf16_ty], - [IntrNoMem, IntrSpeculatable]>; - } // ftz - } // variant - - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_fma_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable]>; + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_fma_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_float_ty, llvm_float_ty]>; - def int_nvvm_fma_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], - [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_fma_ # rnd # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], + [llvm_double_ty, llvm_double_ty, llvm_double_ty]>; + } } // // Rcp // + let IntrProperties = [IntrNoMem] in { + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_rcp_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_rcp_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_nvvm_rcp_ # rnd # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; + } - def int_nvvm_rcp_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + def int_nvvm_rcp_approx_ftz_f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; + def int_nvvm_rcp_approx_ftz_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; } - def int_nvvm_rcp_approx_ftz_f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rcp_approx_ftz_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - // // Sqrt // - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_sqrt_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + let IntrProperties = [IntrNoMem] in { + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_sqrt_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - def int_nvvm_sqrt_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - } + def int_nvvm_sqrt_ # rnd # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; + } - def int_nvvm_sqrt_f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_nvvm_sqrt_f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - foreach ftz = ["", "_ftz"] in - def int_nvvm_sqrt_approx # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + foreach ftz = ["", "_ftz"] in + def int_nvvm_sqrt_approx # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; + } // // Rsqrt // - - foreach ftz = ["", "_ftz"] in { - def int_nvvm_rsqrt_approx # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rsqrt_approx # ftz # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + let IntrProperties = [IntrNoMem] in { + foreach ftz = ["", "_ftz"] in { + def int_nvvm_rsqrt_approx # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; + def int_nvvm_rsqrt_approx # ftz # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; + } } // // Add // - - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_add_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + let IntrProperties = [IntrNoMem, IntrSpeculatable, Commutative] in { + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_add_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; def int_nvvm_add_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty]>; + } } // @@ -1191,135 +1098,134 @@ let TargetPrefix = "nvvm" in { // // Convert // + let IntrProperties = [IntrNoMem, IntrSpeculatable] in { + def int_nvvm_lohi_i2d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty]>; - def int_nvvm_lohi_i2d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; - - def int_nvvm_d2i_lo : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2i_hi : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_d2i_lo : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty]>; + def int_nvvm_d2i_hi : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty]>; + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_d2f_ # rnd # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty]>; - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_d2f_ # rnd # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; + foreach sign = ["", "u"] in { - foreach sign = ["", "u"] in { + def int_nvvm_d2 # sign # i_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty]>; - def int_nvvm_d2 # sign # i_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_ # sign # i2d_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty]>; - def int_nvvm_ # sign # i2d_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; + foreach ftz = ["", "_ftz"] in + def int_nvvm_f2 # sign # i_ # rnd # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty]>; - foreach ftz = ["", "_ftz"] in - def int_nvvm_f2 # sign # i_ # rnd # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_ # sign # i2f_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty]>; - def int_nvvm_ # sign # i2f_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; + foreach ftz = ["", "_ftz"] in + def int_nvvm_f2 # sign # ll_ # rnd # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty]>; - foreach ftz = ["", "_ftz"] in - def int_nvvm_f2 # sign # ll_ # rnd # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_d2 # sign # ll_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty]>; - def int_nvvm_d2 # sign # ll_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_ # sign # ll2f_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty]>; - def int_nvvm_ # sign # ll2f_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_ # sign # ll2d_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty]>; - def int_nvvm_ # sign # ll2d_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - } // sign - } // rnd + } // sign + } // rnd - foreach ftz = ["", "_ftz"] in { - def int_nvvm_f2h_rn # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; + foreach ftz = ["", "_ftz"] in { + def int_nvvm_f2h_rn # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty]>; - def int_nvvm_bf2h_rn # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_bf2h_rn # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty]>; + } } + let IntrProperties = [IntrNoMem, IntrNoCallback] in { + foreach rnd = ["rn", "rz"] in { + foreach relu = ["", "_relu"] in { + def int_nvvm_ff2bf16x2_ # rnd # relu : NVVMBuiltin, + Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>; - foreach rnd = ["rn", "rz"] in { - foreach relu = ["", "_relu"] in { - def int_nvvm_ff2bf16x2_ # rnd # relu : NVVMBuiltin, - Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; - - def int_nvvm_ff2f16x2_ # rnd # relu : NVVMBuiltin, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_ff2f16x2_ # rnd # relu : NVVMBuiltin, + Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>; - def int_nvvm_f2bf16_ # rnd # relu : NVVMBuiltin, - Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_f2bf16_ # rnd # relu : NVVMBuiltin, + Intrinsic<[llvm_bfloat_ty], [llvm_float_ty]>; + } } - } - - foreach satfinite = ["", "_satfinite"] in { - def int_nvvm_f2tf32_rna # satfinite : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + foreach satfinite = ["", "_satfinite"] in { + def int_nvvm_f2tf32_rna # satfinite : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_float_ty]>; - foreach rnd = ["rn", "rz"] in - foreach relu = ["", "_relu"] in - def int_nvvm_f2tf32_ # rnd # relu # satfinite : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; - } + foreach rnd = ["rn", "rz"] in + foreach relu = ["", "_relu"] in + def int_nvvm_f2tf32_ # rnd # relu # satfinite : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_float_ty]>; + } - foreach type = ["e4m3x2", "e5m2x2"] in { - foreach relu = ["", "_relu"] in { - def int_nvvm_ff_to_ # type # _rn # relu : NVVMBuiltin, - Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + foreach type = ["e4m3x2", "e5m2x2"] in { + foreach relu = ["", "_relu"] in { + def int_nvvm_ff_to_ # type # _rn # relu : NVVMBuiltin, + Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>; - def int_nvvm_f16x2_to_ # type # _rn # relu : NVVMBuiltin, - Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_f16x2_to_ # type # _rn # relu : NVVMBuiltin, + Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>; - def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin, - Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin, + Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>; + } } - } - // FP6 conversions. - foreach type = ["e2m3x2", "e3m2x2"] in { + // FP4 conversions. foreach relu = ["", "_relu"] in { - def int_nvvm_ff_to_ # type # _rn # relu # _satfinite : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_ff_to_e2m1x2_rn # relu # _satfinite : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>; - def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_e2m1x2_to_f16x2_rn # relu : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>; } - } - // FP4 conversions. - foreach relu = ["", "_relu"] in { - def int_nvvm_ff_to_e2m1x2_rn # relu # _satfinite : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + // FP6 conversions. + foreach type = ["e2m3x2", "e3m2x2"] in { + foreach relu = ["", "_relu"] in { + def int_nvvm_ff_to_ # type # _rn # relu # _satfinite : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>; - def int_nvvm_e2m1x2_to_f16x2_rn # relu : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>; - } - - // UE8M0x2 conversions. - foreach rmode = ["_rz", "_rp"] in { - foreach satmode = ["", "_satfinite"] in { - defvar suffix = rmode # satmode; - def int_nvvm_ff_to_ue8m0x2 # suffix : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>; + } + } + + // UE8M0x2 conversions. + foreach rmode = ["_rz", "_rp"] in { + foreach satmode = ["", "_satfinite"] in { + defvar suffix = rmode # satmode; + def int_nvvm_ff_to_ue8m0x2 # suffix : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>; - def int_nvvm_bf16x2_to_ue8m0x2 # suffix : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2bf16_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_bf16x2_to_ue8m0x2 # suffix : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2bf16_ty]>; + } } - } - def int_nvvm_ue8m0x2_to_bf16x2 : NVVMBuiltin, - Intrinsic<[llvm_v2bf16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_ue8m0x2_to_bf16x2 : NVVMBuiltin, + Intrinsic<[llvm_v2bf16_ty], [llvm_i16_ty]>; + } // FNS - def int_nvvm_fns : NVVMBuiltin, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; @@ -1423,14 +1329,16 @@ foreach scope = ["cta", "cluster", "gpu", "sys"] in { } // Async Copy -def int_nvvm_cp_async_mbarrier_arrive : NVVMBuiltin, - Intrinsic<[], [llvm_ptr_ty], [IntrConvergent, IntrNoCallback]>; -def int_nvvm_cp_async_mbarrier_arrive_shared : NVVMBuiltin, - Intrinsic<[], [llvm_shared_ptr_ty], [IntrConvergent, IntrNoCallback]>; -def int_nvvm_cp_async_mbarrier_arrive_noinc : NVVMBuiltin, - Intrinsic<[], [llvm_ptr_ty], [IntrConvergent, IntrNoCallback]>; -def int_nvvm_cp_async_mbarrier_arrive_noinc_shared : NVVMBuiltin, - Intrinsic<[], [llvm_shared_ptr_ty], [IntrConvergent, IntrNoCallback]>; +let IntrProperties = [IntrConvergent, IntrNoCallback] in { + def int_nvvm_cp_async_mbarrier_arrive : NVVMBuiltin, + Intrinsic<[],[llvm_ptr_ty]>; + def int_nvvm_cp_async_mbarrier_arrive_shared : NVVMBuiltin, + Intrinsic<[],[llvm_shared_ptr_ty]>; + def int_nvvm_cp_async_mbarrier_arrive_noinc : NVVMBuiltin, + Intrinsic<[],[llvm_ptr_ty]>; + def int_nvvm_cp_async_mbarrier_arrive_noinc_shared : NVVMBuiltin, + Intrinsic<[],[llvm_shared_ptr_ty]>; +} multiclass CP_ASYNC_SHARED_GLOBAL { def NAME : Intrinsic<[], [llvm_shared_ptr_ty, llvm_global_ptr_ty], @@ -1508,15 +1416,11 @@ def int_nvvm_mbarrier_pending_count : NVVMBuiltin, // Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the // pointer's alignment. -def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty], - [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>]>; -def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty], - [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>]>; -def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>]>; +let IntrProperties = [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>] in { + def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i32_ty]>; + def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty], [llvm_anyptr_ty, llvm_i32_ty]>; + def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty]>; +} // Represents an explicit hole in the LLVM IR type system. It may be inserted by // the compiler in cases where a pointer is of the wrong type. In the backend @@ -1550,8 +1454,8 @@ def int_nvvm_texsurf_handle_internal : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty], [IntrNoMem]>; /// Error / Warn -def int_nvvm_compiler_error : Intrinsic<[], [llvm_anyptr_ty], []>; -def int_nvvm_compiler_warn : Intrinsic<[], [llvm_anyptr_ty], []>; +def int_nvvm_compiler_error : Intrinsic<[], [llvm_anyptr_ty]>; +def int_nvvm_compiler_warn : Intrinsic<[], [llvm_anyptr_ty]>; def int_nvvm_reflect : NVVMBuiltin, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; @@ -1568,1792 +1472,158 @@ foreach i = 0...31 in DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef]>; -// Texture Fetch -// texmode_independent -def int_nvvm_tex_1d_v4f32_s32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_tex_1d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], []>; -def int_nvvm_tex_1d_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_1d_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_1d_v4s32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_tex_1d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], []>; -def int_nvvm_tex_1d_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_1d_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_1d_v4u32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_tex_1d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], []>; -def int_nvvm_tex_1d_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_1d_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; - -def int_nvvm_tex_1d_array_v4f32_s32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_1d_array_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; -def int_nvvm_tex_1d_array_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_1d_array_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_1d_array_v4s32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_1d_array_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; -def int_nvvm_tex_1d_array_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_1d_array_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_1d_array_v4u32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_1d_array_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; -def int_nvvm_tex_1d_array_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_1d_array_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tex_2d_v4f32_s32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_2d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_2d_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_2d_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_2d_v4s32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_2d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_2d_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_2d_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_2d_v4u32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_2d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_2d_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_2d_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tex_2d_array_v4f32_s32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty], []>; -def int_nvvm_tex_2d_array_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_2d_array_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_2d_array_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_2d_array_v4s32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty], []>; -def int_nvvm_tex_2d_array_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_2d_array_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_2d_array_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_2d_array_v4u32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty], []>; -def int_nvvm_tex_2d_array_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_2d_array_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_2d_array_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; - -def int_nvvm_tex_3d_v4f32_s32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - []>; -def int_nvvm_tex_3d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_3d_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_3d_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_3d_v4s32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - []>; -def int_nvvm_tex_3d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_3d_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_3d_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_3d_v4u32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - []>; -def int_nvvm_tex_3d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_3d_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_3d_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tex_cube_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_cube_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_cube_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_cube_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_cube_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_cube_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tex_cube_array_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_cube_array_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_cube_array_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_cube_array_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_cube_array_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_cube_array_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tld4_r_2d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_g_2d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_b_2d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_a_2d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_r_2d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_g_2d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_b_2d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_a_2d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_r_2d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_g_2d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_b_2d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_a_2d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -// texmode_unified -def int_nvvm_tex_unified_1d_v4f32_s32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_tex_unified_1d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_v4s32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_tex_unified_1d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_v4u32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_tex_unified_1d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; - -def int_nvvm_tex_unified_1d_array_v4f32_s32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_unified_1d_array_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_array_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_array_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_array_v4s32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_unified_1d_array_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_array_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_array_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_array_v4u32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_unified_1d_array_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_array_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_1d_array_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tex_unified_2d_v4f32_s32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_unified_2d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_v4s32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_unified_2d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_v4u32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_tex_unified_2d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tex_unified_2d_array_v4f32_s32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty], []>; -def int_nvvm_tex_unified_2d_array_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_array_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_array_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_array_v4s32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty], []>; -def int_nvvm_tex_unified_2d_array_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_array_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_array_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_array_v4u32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty], []>; -def int_nvvm_tex_unified_2d_array_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_array_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_2d_array_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; - -def int_nvvm_tex_unified_3d_v4f32_s32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - []>; -def int_nvvm_tex_unified_3d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_3d_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_3d_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_3d_v4s32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - []>; -def int_nvvm_tex_unified_3d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_3d_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_3d_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_3d_v4u32_s32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - []>; -def int_nvvm_tex_unified_3d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty], []>; -def int_nvvm_tex_unified_3d_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_3d_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tex_unified_cube_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tex_unified_cube_array_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_array_level_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_array_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_array_level_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_array_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_array_level_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tex_unified_cube_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tex_unified_cube_array_grad_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_array_grad_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tex_unified_cube_array_grad_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, - llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, - llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - -def int_nvvm_tld4_unified_r_2d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_unified_g_2d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_unified_b_2d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_unified_a_2d_v4f32_f32 - : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_unified_r_2d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_unified_g_2d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_unified_b_2d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_unified_a_2d_v4s32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_unified_r_2d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_unified_g_2d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_unified_b_2d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; -def int_nvvm_tld4_unified_a_2d_v4u32_f32 - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; + +foreach is_unified = [true, false] in { + defvar mode = !if(is_unified, "_unified", ""); + defvar addr_args = !if(is_unified, [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]); + + // Texture Fetch + foreach vec = [V4F32, V4S32, V4U32] in { + foreach is_array = [true, false] in { + defvar array = !if(is_array, "_array", ""); + defvar array_args = !if(is_array, [llvm_i32_ty], []); + + def int_nvvm_tex # mode # _1d # array # _ # vec.Name # _s32 + : Intrinsic; + def int_nvvm_tex # mode # _1d # array # _ # vec.Name # _f32 + : Intrinsic; + def int_nvvm_tex # mode # _1d # array # _level_ # vec.Name # _f32 + : Intrinsic; + def int_nvvm_tex # mode # _1d # array # _grad_ # vec.Name # _f32 + : Intrinsic; + + def int_nvvm_tex # mode # _2d # array # _ # vec.Name # _s32 + : Intrinsic; + def int_nvvm_tex # mode # _2d # array # _ # vec.Name # _f32 + : Intrinsic; + def int_nvvm_tex # mode # _2d # array # _level_ # vec.Name # _f32 + : Intrinsic; + def int_nvvm_tex # mode # _2d # array # _grad_ # vec.Name # _f32 + : Intrinsic; + + if !not(is_array) then { + def int_nvvm_tex # mode # _3d_ # vec.Name # _s32 + : Intrinsic; + def int_nvvm_tex # mode # _3d_ # vec.Name # _f32 + : Intrinsic; + def int_nvvm_tex # mode # _3d_level_ # vec.Name # _f32 + : Intrinsic; + def int_nvvm_tex # mode # _3d_grad_ # vec.Name # _f32 + : Intrinsic; + } + + def int_nvvm_tex # mode # _cube # array # _ # vec.Name # _f32 + : Intrinsic; + def int_nvvm_tex # mode # _cube # array # _level_ # vec.Name # _f32 + : Intrinsic; + + if is_unified then + def int_nvvm_tex # mode # _cube # array # _grad_ # vec.Name # _f32 + : Intrinsic; + } // is_array + + foreach comp = ["r", "g", "b", "a"] in { + def int_nvvm_tld4 # mode # _ # comp # _2d_ # vec.Name # _f32 + : Intrinsic; + } // comp + } // vec +} // is_unified + //=== Surface Load -// .clamp variants -def int_nvvm_suld_1d_i8_clamp - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_i16_clamp - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_i32_clamp - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_i64_clamp - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i8_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i16_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i32_clamp - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i64_clamp - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v4i8_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v4i16_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v4i32_clamp - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_1d_array_i8_clamp - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_i16_clamp - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_i32_clamp - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_i64_clamp - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i8_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i16_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i32_clamp - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i64_clamp - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v4i8_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v4i16_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v4i32_clamp - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_2d_i8_clamp - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_i16_clamp - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_i32_clamp - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_i64_clamp - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i8_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i16_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i32_clamp - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i64_clamp - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v4i8_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v4i16_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v4i32_clamp - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_2d_array_i8_clamp - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_i16_clamp - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_i32_clamp - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_i64_clamp - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i8_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i16_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i32_clamp - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i64_clamp - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v4i8_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v4i16_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v4i32_clamp - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_3d_i8_clamp - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_i16_clamp - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_i32_clamp - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_i64_clamp - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i8_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i16_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i32_clamp - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i64_clamp - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v4i8_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v4i16_clamp - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v4i32_clamp - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; - -// .trap variants -def int_nvvm_suld_1d_i8_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_i16_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_i32_trap - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_i64_trap - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i64_trap - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v4i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v4i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v4i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_1d_array_i8_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_i16_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_i32_trap - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_i64_trap - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i64_trap - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v4i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v4i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v4i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_2d_i8_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_i16_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_i32_trap - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_i64_trap - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i64_trap - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v4i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v4i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v4i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_2d_array_i8_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_i16_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_i32_trap - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_i64_trap - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i64_trap - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v4i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v4i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v4i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_3d_i8_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_i16_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_i32_trap - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_i64_trap - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i64_trap - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v4i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v4i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v4i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; - -// .zero variants -def int_nvvm_suld_1d_i8_zero - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_i16_zero - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_i32_zero - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_i64_zero - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i8_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i16_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i32_zero - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v2i64_zero - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v4i8_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v4i16_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_v4i32_zero - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_1d_array_i8_zero - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_i16_zero - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_i32_zero - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_i64_zero - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i8_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i16_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i32_zero - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v2i64_zero - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v4i8_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v4i16_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_1d_array_v4i32_zero - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_2d_i8_zero - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_i16_zero - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_i32_zero - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_i64_zero - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i8_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i16_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i32_zero - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v2i64_zero - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v4i8_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v4i16_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_v4i32_zero - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_2d_array_i8_zero - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_i16_zero - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_i32_zero - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_i64_zero - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i8_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i16_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i32_zero - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v2i64_zero - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v4i8_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v4i16_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_2d_array_v4i32_zero - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; - -def int_nvvm_suld_3d_i8_zero - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_i16_zero - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_i32_zero - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_i64_zero - : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i8_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i16_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i32_zero - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v2i64_zero - : Intrinsic<[llvm_i64_ty, llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v4i8_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v4i16_zero - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_suld_3d_v4i32_zero - : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +foreach clamp = ["clamp", "trap", "zero"] in { + foreach vec = [TV_I8, TV_I16, TV_I32, TV_I64, + TV_V2I8, TV_V2I16, TV_V2I32, TV_V2I64, + TV_V4I8, TV_V4I16, TV_V4I32] in { + + def int_nvvm_suld_1d_ # vec.Name # _ # clamp + : Intrinsic; + + def int_nvvm_suld_1d_array_ # vec.Name # _ # clamp + : Intrinsic; + + def int_nvvm_suld_2d_ # vec.Name # _ # clamp + : Intrinsic; + + def int_nvvm_suld_2d_array_ # vec.Name # _ # clamp + : Intrinsic; + + def int_nvvm_suld_3d_ # vec.Name # _ # clamp + : Intrinsic; + } // vec +} // clamp //===- Texture Query ------------------------------------------------------===// foreach query = ["channel_order", "channel_data_type", "width", "height", - "depth", "array_size", "num_samples", "num_mipmap_levels"] in { + "depth", "array_size", "num_samples", "num_mipmap_levels"] in def int_nvvm_txq_ # query : NVVMBuiltin, Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; -} //===- Surface Query ------------------------------------------------------===// -foreach query = ["channel_order", "channel_data_type", "width", "height", - "depth", "array_size"] in { +foreach query = ["channel_order", "channel_data_type", "width", "height", + "depth", "array_size"] in def int_nvvm_suq_ # query : NVVMBuiltin, Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; -} //===- Handle Query -------------------------------------------------------===// -foreach type = ["sampler", "surface", "texture"] in { +foreach type = ["sampler", "surface", "texture"] in def int_nvvm_istypep_ # type : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem]>; -} //===- Surface Stores -----------------------------------------------------===// +multiclass SurfaceStoreIntrinsics { + def _1d_ # vec.Name # _ # clamp : NVVMBuiltin, + Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty], vec.Types)>; + + def _1d_array_ # vec.Name # _ # clamp : NVVMBuiltin, + Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>; + + def _2d_ # vec.Name # _ # clamp : NVVMBuiltin, + Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>; + + def _2d_array_ # vec.Name # _ # clamp : NVVMBuiltin, + Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>; + + def _3d_ # vec.Name # _ # clamp : NVVMBuiltin, + Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>; +} + // Unformatted -// .clamp variant -def int_nvvm_sust_b_1d_i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_i64_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_v2i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v2i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v2i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_v2i64_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_v4i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v4i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v4i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_array_i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_array_i64_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_array_v2i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v2i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v2i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_array_v2i64_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_array_v4i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v4i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v4i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_i64_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_v2i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v2i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v2i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_v2i64_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_v4i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v4i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v4i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_array_i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_array_i64_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_array_v2i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v2i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v2i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_array_v2i64_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_array_v4i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v4i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v4i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_3d_i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_3d_i64_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_3d_v2i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v2i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v2i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_3d_v2i64_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_3d_v4i8_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v4i16_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v4i32_clamp : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -// .trap variant -def int_nvvm_sust_b_1d_i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_i64_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_v2i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v2i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v2i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_v2i64_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_v4i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v4i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v4i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_array_i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_array_i64_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_array_v2i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v2i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v2i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_array_v2i64_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_array_v4i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v4i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v4i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_i64_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_v2i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v2i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v2i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_v2i64_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_v4i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v4i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v4i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_array_i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_array_i64_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_array_v2i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v2i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v2i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_array_v2i64_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_array_v4i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v4i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v4i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_3d_i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_3d_i64_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_3d_v2i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v2i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v2i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_3d_v2i64_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_3d_v4i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v4i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v4i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -// .zero variant -def int_nvvm_sust_b_1d_i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_i64_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_v2i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v2i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v2i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_v2i64_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_v4i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v4i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_v4i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_array_i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_array_i64_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_array_v2i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v2i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v2i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_1d_array_v2i64_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_1d_array_v4i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v4i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_1d_array_v4i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_i64_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_v2i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v2i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v2i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_v2i64_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_v4i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v4i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_v4i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_array_i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_array_i64_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_array_v2i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v2i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v2i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_2d_array_v2i64_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_2d_array_v4i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v4i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_2d_array_v4i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_3d_i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_3d_i64_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_3d_v2i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v2i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v2i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_b_3d_v2i64_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i64_ty, llvm_i64_ty], []>; -def int_nvvm_sust_b_3d_v4i8_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v4i16_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_b_3d_v4i32_zero : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +foreach clamp = ["clamp", "trap", "zero"] in + foreach vec = [TV_I8, TV_I16, TV_I32, TV_I64, + TV_V2I8, TV_V2I16, TV_V2I32, TV_V2I64, + TV_V4I8, TV_V4I16, TV_V4I32] in + defm int_nvvm_sust_b : SurfaceStoreIntrinsics; // Formatted - -def int_nvvm_sust_p_1d_i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_1d_v2i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_v2i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_v2i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_1d_v4i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_v4i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_v4i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_1d_array_i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_array_i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_array_i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_1d_array_v2i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_array_v2i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_array_v2i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_1d_array_v4i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_array_v4i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_1d_array_v4i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_2d_i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_2d_v2i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_v2i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_v2i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_2d_v4i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_v4i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_v4i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_2d_array_i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_array_i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_array_i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_2d_array_v2i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_array_v2i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_array_v2i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_2d_array_v4i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_array_v4i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_2d_array_v4i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_3d_i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_3d_i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_3d_i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_3d_v2i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_3d_v2i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_3d_v2i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], []>; -def int_nvvm_sust_p_3d_v4i8_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_3d_v4i16_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; -def int_nvvm_sust_p_3d_v4i32_trap : NVVMBuiltin, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +foreach vec = [TV_I8, TV_I16, TV_I32, + TV_V2I8, TV_V2I16, TV_V2I32, + TV_V4I8, TV_V4I16, TV_V4I32] in + defm int_nvvm_sust_p : SurfaceStoreIntrinsics<"trap", vec>; // Accessing special registers. @@ -3475,19 +1745,16 @@ def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32; // SHUFFLE // // Generate intrinsics for all variants of shfl instruction. -foreach sync = [false, true] in { - foreach mode = ["up", "down", "bfly", "idx"] in { - foreach type = ["i32", "f32"] in { - foreach return_pred = [false, true] in { - defvar i = SHFL_INFO; - if i.withGccBuiltin then { - def i.Name : NVVMBuiltin, - Intrinsic; - } else { - def i.Name : - Intrinsic; +let IntrProperties = [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback] in { + foreach sync = [false, true] in { + foreach mode = ["up", "down", "bfly", "idx"] in { + foreach type = ["i32", "f32"] in { + foreach return_pred = [false, true] in { + defvar i = SHFL_INFO; + if i.withGccBuiltin then + def i.Name : NVVMBuiltin, Intrinsic; + else + def i.Name : Intrinsic; } } } @@ -3498,43 +1765,21 @@ foreach sync = [false, true] in { // VOTE // -// vote.all pred -def int_nvvm_vote_all : NVVMBuiltin, - Intrinsic<[llvm_i1_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; -// vote.any pred -def int_nvvm_vote_any : NVVMBuiltin, - Intrinsic<[llvm_i1_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; -// vote.uni pred -def int_nvvm_vote_uni : NVVMBuiltin, - Intrinsic<[llvm_i1_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; -// vote.ballot pred -def int_nvvm_vote_ballot : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; - +let IntrProperties = [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback] in { + def int_nvvm_vote_all : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i1_ty]>; + def int_nvvm_vote_any : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i1_ty]>; + def int_nvvm_vote_uni : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i1_ty]>; + def int_nvvm_vote_ballot : NVVMBuiltin, Intrinsic<[llvm_i32_ty], [llvm_i1_ty]>; +} // // VOTE.SYNC // - -// vote.sync.all mask, pred -def int_nvvm_vote_all_sync : NVVMBuiltin, - Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; -// vote.sync.any mask, pred -def int_nvvm_vote_any_sync : NVVMBuiltin, - Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; -// vote.sync.uni mask, pred -def int_nvvm_vote_uni_sync : NVVMBuiltin, - Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; -// vote.sync.ballot mask, pred -def int_nvvm_vote_ballot_sync : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; +let IntrProperties = [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback] in { + def int_nvvm_vote_all_sync : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty]>; + def int_nvvm_vote_any_sync : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty]>; + def int_nvvm_vote_uni_sync : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty]>; + def int_nvvm_vote_ballot_sync : NVVMBuiltin, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty]>; +} // // ACTIVEMASK @@ -3546,28 +1791,25 @@ def int_nvvm_activemask : NVVMBuiltin, // // MATCH.SYNC // -// match.any.sync.b32 mask, value -def int_nvvm_match_any_sync_i32 : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; -// match.any.sync.b64 mask, value -def int_nvvm_match_any_sync_i64 : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; - -// match.all instruction have two variants -- one returns a single value, another -// returns a pair {value, predicate}. We currently only implement the latter as -// that's the variant exposed by CUDA API. - -// match.all.sync.b32p mask, value -def int_nvvm_match_all_sync_i32p : - Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; -// match.all.sync.b64p mask, value -def int_nvvm_match_all_sync_i64p : - Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty], - [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; - +let IntrProperties = [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback] in { + // match.any.sync.b32 mask, value + def int_nvvm_match_any_sync_i32 : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>; + // match.any.sync.b64 mask, value + def int_nvvm_match_any_sync_i64 : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty]>; + + // match.all instruction have two variants -- one returns a single value, another + // returns a pair {value, predicate}. We currently only implement the latter as + // that's the variant exposed by CUDA API. + + // match.all.sync.b32p mask, value + def int_nvvm_match_all_sync_i32p : + Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty]>; + // match.all.sync.b64p mask, value + def int_nvvm_match_all_sync_i64p : + Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty]>; +} // // ELECT.SYNC // @@ -3581,21 +1823,17 @@ def int_nvvm_elect_sync : // // redux.sync.op.u32 dst, src, membermask; -foreach op = ["umin", "umax", "add", "min", "max", "and", "xor", "or"] in { - def int_nvvm_redux_sync_ # op : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; -} - -// redux.sync.op.{abs}.{NaN}.f32 dst, src, membermask; -foreach binOp = ["min", "max"] in { - foreach abs = ["", "_abs"] in { - foreach NaN = ["", "_NaN"] in { - def int_nvvm_redux_sync_f # binOp # abs # NaN : NVVMBuiltin, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty], - [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; - } - } +let IntrProperties = [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback] in { + foreach op = ["umin", "umax", "add", "min", "max", "and", "xor", "or"] in + def int_nvvm_redux_sync_ # op : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>; + + // redux.sync.op.{abs}.{NaN}.f32 dst, src, membermask; + foreach binOp = ["min", "max"] in + foreach abs = ["", "_abs"] in + foreach NaN = ["", "_NaN"] in + def int_nvvm_redux_sync_f # binOp # abs # NaN : NVVMBuiltin, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty]>; } // @@ -3647,13 +1885,10 @@ foreach layout = ["row", "col"] in { } // WMMA.MMA -class NVVM_WMMA_MMA +class NVVM_MMA : Intrinsic.llvm>; + [IntrNoMem, IntrNoCallback]>; foreach layout_a = ["row", "col"] in { foreach layout_b = ["row", "col"] in { @@ -3664,8 +1899,7 @@ foreach layout_a = ["row", "col"] in { if NVVM_WMMA_SUPPORTED.ret then { def WMMA_NAME.record - : NVVM_WMMA_MMA; + : NVVM_MMA; } } // b1op } // op @@ -3674,14 +1908,6 @@ foreach layout_a = ["row", "col"] in { } // layout_b } // layout_a -// MMA -class NVVM_MMA - : Intrinsic.llvm>; - foreach layout_a = ["row", "col"] in { foreach layout_b = ["row", "col"] in { foreach satf = [0, 1] in { @@ -3689,7 +1915,7 @@ foreach layout_a = ["row", "col"] in { foreach b1op = NVVM_MMA_B1OPS.ret in { if NVVM_MMA_SUPPORTED.ret then { def MMA_NAME.record - : NVVM_MMA; + : NVVM_MMA; } } // b1op } // op @@ -3713,18 +1939,22 @@ foreach transposed = [0, 1] in { } } -def int_nvvm_mapa - : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>]>; -def int_nvvm_mapa_shared_cluster - : DefaultAttrsIntrinsic<[llvm_shared_cluster_ptr_ty], [llvm_shared_ptr_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>]>; -def int_nvvm_getctarank - : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>]>; -def int_nvvm_getctarank_shared_cluster - : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_shared_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture>]>; +// MAPA +let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture>] in { + def int_nvvm_mapa + : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty]>; + def int_nvvm_mapa_shared_cluster + : DefaultAttrsIntrinsic<[llvm_shared_cluster_ptr_ty], [llvm_shared_ptr_ty, llvm_i32_ty]>; +} + +// GETCTARANK +let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture>] in { + def int_nvvm_getctarank + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; + def int_nvvm_getctarank_shared_cluster + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_shared_ptr_ty]>; +} + def int_nvvm_is_explicit_cluster : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef], @@ -3740,87 +1970,123 @@ foreach op = ["dec", "inc"] in def int_nvvm_exit : NVVMBuiltin, Intrinsic<[], [], [IntrConvergent, IntrInaccessibleMemOnly, IntrNoReturn]>; +class DefaultAttrsIntrinsicFlags ret_types, + list param_types, + list flags, + list intr_properties> + : DefaultAttrsIntrinsic< + ret_types, + !listconcat(param_types, flags), + !listconcat(intr_properties, + !foreach(i, !range(flags), + ImmArg>))>; + // Intrinsics for Tensor Copy using TMA // G2S -> From Global to Shared memory variants // S2G -> From Shared to Global memory variants -foreach dim = [1, 2, 3, 4, 5] in { +foreach dim = 1...5 in { + defvar tensor_dim_args = !listsplat(llvm_i32_ty, dim); + foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { - foreach g2s = [CP_ASYNC_BULK_TENSOR_G2S_INTR] in - def g2s.Name : DefaultAttrsIntrinsic<[], g2s.ArgsTy, g2s.IntrProp>; - foreach s2g = [CP_ASYNC_BULK_TENSOR_S2G_INTR] in - def s2g.Name : DefaultAttrsIntrinsic<[], s2g.ArgsTy, s2g.IntrProp>; - foreach prefetch = [CP_ASYNC_BULK_TENSOR_PREFETCH_INTR] in - def prefetch.Name : DefaultAttrsIntrinsic<[], prefetch.ArgsTy, prefetch.IntrProp>; + defvar is_im2col = !eq(mode, "im2col"); + defvar num_im2col_offsets = !if(is_im2col, !add(dim, -2), 0); + defvar im2col_offsets_args = !listsplat(llvm_i16_ty, num_im2col_offsets); + + def int_nvvm_cp_async_bulk_tensor_g2s_ # mode # _ # dim # d : + DefaultAttrsIntrinsicFlags<[], + !listconcat([llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr + llvm_shared_ptr_ty, // mbarrier_smem_ptr + llvm_ptr_ty], // tensormap_ptr + tensor_dim_args, // actual tensor dims + im2col_offsets_args, // im2col offsets + [llvm_i16_ty, // cta_mask + llvm_i64_ty]), // cache_hint + [llvm_i1_ty, // Flag for cta_mask + llvm_i1_ty], // Flag for cache_hint + [IntrConvergent, + WriteOnly>, ReadOnly>, + NoCapture>, NoCapture>, NoCapture>]>; + + def int_nvvm_cp_async_bulk_tensor_s2g_ # mode # _ # dim # d : + DefaultAttrsIntrinsicFlags<[], + !listconcat([llvm_shared_ptr_ty, // src_smem_ptr + llvm_ptr_ty], // tensormap_ptr + tensor_dim_args, // actual tensor dims + [llvm_i64_ty]), // cache_hint + [llvm_i1_ty], // Flag for cache_hint + [IntrConvergent, + ReadOnly>, ReadOnly>, + NoCapture>, NoCapture>]>; + + def int_nvvm_cp_async_bulk_tensor_prefetch_ # mode # _ # dim # d : + DefaultAttrsIntrinsicFlags<[], + !listconcat([llvm_ptr_ty], // tensormap_ptr + tensor_dim_args, // actual tensor dims + im2col_offsets_args, // im2col offsets + [llvm_i64_ty]), // cache_hint + [llvm_i1_ty], // Flag for cache_hint + [IntrConvergent, + ReadOnly>, NoCapture>]>; + + // Intrinsics for TMA Copy with reduction + foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in + def int_nvvm_cp_async_bulk_tensor_reduce_ # red_op # _ # mode # _ # dim # d : + DefaultAttrsIntrinsicFlags<[], + !listconcat([llvm_shared_ptr_ty, // src_smem_ptr + llvm_ptr_ty], // tensormap_ptr + tensor_dim_args, // actual tensor dims + [llvm_i64_ty]), // cache_hint + [llvm_i1_ty], // Flag for cache_hint + [IntrConvergent, ReadOnly>, ReadOnly>, + NoCapture>, NoCapture>]>; } } -// Intrinsics for TMA Copy with reduction -foreach dim = [1, 2, 3, 4, 5] in { - foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { - foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in { - foreach reduce = [CP_ASYNC_BULK_TENSOR_REDUCE_INTR] in - def reduce.Name : DefaultAttrsIntrinsic<[], reduce.ArgsTy, reduce.IntrProp>; - } +// Intrinsics for Prefetch and Prefetchu +let IntrProperties = [IntrArgMemOnly, ReadOnly>, NoCapture>] in { + foreach level = ["L1", "L2"] in { + def int_nvvm_prefetch_ # level : Intrinsic<[], [llvm_ptr_ty]>; + def int_nvvm_prefetch_global_ # level : Intrinsic<[], [llvm_global_ptr_ty]>; + def int_nvvm_prefetch_local_ # level : Intrinsic<[], [llvm_local_ptr_ty]>; } + + foreach eviction_priority = ["evict_normal", "evict_last"] in + def int_nvvm_prefetch_global_L2_ # eviction_priority : Intrinsic<[], [llvm_global_ptr_ty]>; + + def int_nvvm_prefetchu_L1 : Intrinsic<[], [llvm_ptr_ty]>; } -// Intrinsics for Prefetch and Prefetchu -def int_nvvm_prefetch_L1 : Intrinsic<[], [llvm_ptr_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>]>; -def int_nvvm_prefetch_L2 : Intrinsic<[], [llvm_ptr_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>]>; -def int_nvvm_prefetch_global_L1 : Intrinsic<[], [llvm_global_ptr_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>]>; -def int_nvvm_prefetch_global_L2 : Intrinsic<[], [llvm_global_ptr_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>]>; -def int_nvvm_prefetch_local_L1 : Intrinsic<[], [llvm_local_ptr_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>]>; -def int_nvvm_prefetch_local_L2 : Intrinsic<[], [llvm_local_ptr_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>]>; - -def int_nvvm_prefetch_global_L2_evict_normal : Intrinsic<[], [llvm_global_ptr_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>]>; -def int_nvvm_prefetch_global_L2_evict_last : Intrinsic<[], [llvm_global_ptr_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>]>; -def int_nvvm_prefetchu_L1 : Intrinsic<[], [llvm_ptr_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>]>; - -def int_nvvm_applypriority_global_L2_evict_normal - : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty, llvm_i64_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>, - ImmArg>]>; - -def int_nvvm_applypriority_L2_evict_normal - : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>, - ImmArg>]>; - -// Intrinsics for discard -def int_nvvm_discard_global_L2 : DefaultAttrsIntrinsic<[], - [llvm_global_ptr_ty, llvm_i64_ty], [NoCapture>, - ImmArg>, IntrHasSideEffects]>; - -def int_nvvm_discard_L2 : DefaultAttrsIntrinsic<[], - [llvm_ptr_ty, llvm_i64_ty], [NoCapture>, - ImmArg>, IntrHasSideEffects]>; +// applypriority +let IntrProperties = [IntrArgMemOnly, ReadOnly>, NoCapture>, + ImmArg>] in { + def int_nvvm_applypriority_global_L2_evict_normal + : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty, llvm_i64_ty]>; + + def int_nvvm_applypriority_L2_evict_normal + : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty]>; +} + +// discard +let IntrProperties = [NoCapture>, ImmArg>, IntrHasSideEffects] in { + def int_nvvm_discard_global_L2 : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty, llvm_i64_ty]>; + def int_nvvm_discard_L2 : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty]>; +} // Intrinsics for Bulk Copy using TMA (non-tensor) // From Global to Shared Cluster def int_nvvm_cp_async_bulk_global_to_shared_cluster - : DefaultAttrsIntrinsic<[], + : DefaultAttrsIntrinsicFlags<[], [llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr llvm_shared_ptr_ty, // mbarrier_ptr llvm_global_ptr_ty, // src_gmem_ptr llvm_i32_ty, // copy_size llvm_i16_ty, // cta_mask - llvm_i64_ty, // cache_hint - llvm_i1_ty, // Flag for cta_mask + llvm_i64_ty], // cache_hint + [llvm_i1_ty, // Flag for cta_mask llvm_i1_ty], // Flag for cache_hint [IntrConvergent, IntrArgMemOnly, WriteOnly>, ReadOnly>, - NoCapture>, NoCapture>, - NoCapture>, ImmArg>, - ImmArg>]>; + NoCapture>, NoCapture>, NoCapture>]>; // From Shared CTA to Shared Cluster def int_nvvm_cp_async_bulk_shared_cta_to_cluster @@ -3836,27 +2102,25 @@ def int_nvvm_cp_async_bulk_shared_cta_to_cluster // From Shared CTA to Global memory def int_nvvm_cp_async_bulk_shared_cta_to_global - : DefaultAttrsIntrinsic<[], + : DefaultAttrsIntrinsicFlags<[], [llvm_global_ptr_ty, // dst_gmem_ptr llvm_shared_ptr_ty, // src_smem_ptr llvm_i32_ty, // copy_size - llvm_i64_ty, // cache_hint - llvm_i1_ty], // Flag for cache_hint + llvm_i64_ty], // cache_hint + [llvm_i1_ty], // Flag for cache_hint [IntrConvergent, IntrArgMemOnly, WriteOnly>, ReadOnly>, - NoCapture>, NoCapture>, - ImmArg>]>; + NoCapture>, NoCapture>]>; // Intrinsics for Bulk Copy Prefetch L2 def int_nvvm_cp_async_bulk_prefetch_L2 - : DefaultAttrsIntrinsic<[], + : DefaultAttrsIntrinsicFlags<[], [llvm_global_ptr_ty, // src_gmem_ptr llvm_i32_ty, // copy_size - llvm_i64_ty, // cache_hint - llvm_i1_ty], // Flag for cache_hint + llvm_i64_ty], // cache_hint + [llvm_i1_ty], // Flag for cache_hint [IntrConvergent, IntrArgMemOnly, - NoCapture>, ReadOnly>, - ImmArg>]>; + NoCapture>, ReadOnly>]>; def int_nvvm_griddepcontrol_launch_dependents : Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_nvvm_griddepcontrol_wait : Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; @@ -3955,8 +2219,7 @@ class NVVM_TCGEN05_LD : !listconcat([IntrConvergent, IntrArgMemOnly, NoCapture>], !if(!eq(Shape, "16x32bx2"), [ImmArg>, ImmArg>], - [ImmArg>])), - NVVM_TCGEN05_LDST_NAME<"ld", Shape, Num>.intr>; + [ImmArg>]))>; // Tcgen05 st intrinsics class NVVM_TCGEN05_ST : @@ -3968,32 +2231,28 @@ class NVVM_TCGEN05_ST : !listconcat([IntrConvergent, IntrArgMemOnly, NoCapture>], !if(!eq(Shape, "16x32bx2"), [ImmArg>, ImmArg>], - [ImmArg>])), - NVVM_TCGEN05_LDST_NAME<"st", Shape, Num>.intr>; + [ImmArg>]))>; foreach shape = ["16x64b", "16x128b", "16x256b", "32x32b", "16x32bx2"] in { - foreach num = !range(0, 8) in { + foreach num = 0...8 in { if NVVM_TCGEN05_LDST_ACCESS_SIZE.valid then { - def NVVM_TCGEN05_LDST_NAME<"ld", shape, num>.record : + def int_nvvm_tcgen05_ld_ # shape # _x # !shl(1, num) : NVVM_TCGEN05_LD; - def NVVM_TCGEN05_LDST_NAME<"st", shape, num>.record : + def int_nvvm_tcgen05_st_ # shape # _x # !shl(1, num) : NVVM_TCGEN05_ST; - } + } } } // // Bulk store intrinsics // +let IntrProperties = [IntrArgMemOnly, IntrWriteMem, WriteOnly>, + NoCapture>, ImmArg>] in { + def int_nvvm_st_bulk : + DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty]>; -def int_nvvm_st_bulk : DefaultAttrsIntrinsic<[], - [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], - [IntrArgMemOnly, IntrWriteMem, - WriteOnly>, NoCapture>, ImmArg>]>; - -def int_nvvm_st_bulk_shared_cta : DefaultAttrsIntrinsic<[], - [llvm_shared_ptr_ty, llvm_i64_ty, llvm_i64_ty], - [IntrArgMemOnly, IntrWriteMem, - WriteOnly>, NoCapture>, ImmArg>]>; - + def int_nvvm_st_bulk_shared_cta : + DefaultAttrsIntrinsic<[], [llvm_shared_ptr_ty, llvm_i64_ty, llvm_i64_ty]>; +} } // let TargetPrefix = "nvvm"