@@ -2718,80 +2718,64 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
27182718// Scalar
27192719
27202720multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2721- def avar : NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2722- !strconcat( "ldu.global.", TyStr) ,
2721+ def asi : NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset ),
2722+ "ldu.global." # TyStr # " \t$result, [$src$offset];" ,
27232723 []>, Requires<[hasLDU]>;
27242724 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2725- !strconcat( "ldu.global.", TyStr) ,
2725+ "ldu.global." # TyStr # " \t$result, [$src];" ,
27262726 []>, Requires<[hasLDU]>;
27272727 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2728- !strconcat( "ldu.global.", TyStr) ,
2728+ "ldu.global." # TyStr # " \t$result, [$src];" ,
27292729 []>, Requires<[hasLDU]>;
27302730}
27312731
2732- defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src]; ", Int16Regs>;
2733- defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src]; ", Int16Regs>;
2734- defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src]; ", Int32Regs>;
2735- defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src]; ", Int64Regs>;
2736- defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src]; ", Float32Regs>;
2737- defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src]; ", Float64Regs>;
2732+ defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8", Int16Regs>;
2733+ defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16", Int16Regs>;
2734+ defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32", Int32Regs>;
2735+ defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64", Int64Regs>;
2736+ defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32", Float32Regs>;
2737+ defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64", Float64Regs>;
27382738
27392739// vector
27402740
27412741// Elementized vector ldu
27422742multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
27432743 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
27442744 (ins MEMri:$src),
2745- !strconcat( "ldu.global.", TyStr) , []>;
2745+ "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
27462746 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
27472747 (ins MEMri64:$src),
2748- !strconcat( "ldu.global.", TyStr) , []>;
2749- def _avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2750- (ins imemAny:$src),
2751- !strconcat( "ldu.global.", TyStr) , []>;
2748+ "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
2749+ def _asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2750+ (ins imemAny:$src, Offseti32imm:$offset ),
2751+ "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];" , []>;
27522752}
27532753
27542754multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
27552755 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
27562756 regclass:$dst4), (ins MEMri:$src),
2757- !strconcat( "ldu.global.", TyStr) , []>;
2757+ "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];" , []>;
27582758 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
27592759 regclass:$dst4), (ins MEMri64:$src),
2760- !strconcat("ldu.global.", TyStr), []>;
2761- def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2762- regclass:$dst4), (ins imemAny:$src),
2763- !strconcat("ldu.global.", TyStr), []>;
2764- }
2765-
2766- defm INT_PTX_LDU_G_v2i8_ELE
2767- : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2768- defm INT_PTX_LDU_G_v2i16_ELE
2769- : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2770- defm INT_PTX_LDU_G_v2i32_ELE
2771- : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2772- defm INT_PTX_LDU_G_v2f32_ELE
2773- : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2774- defm INT_PTX_LDU_G_v2i64_ELE
2775- : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2776- defm INT_PTX_LDU_G_v2f64_ELE
2777- : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2778- defm INT_PTX_LDU_G_v4i8_ELE
2779- : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2780- defm INT_PTX_LDU_G_v4i16_ELE
2781- : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2782- Int16Regs>;
2783- defm INT_PTX_LDU_G_v4i32_ELE
2784- : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2785- Int32Regs>;
2786- defm INT_PTX_LDU_G_v4f16_ELE
2787- : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2788- Int16Regs>;
2789- defm INT_PTX_LDU_G_v4f16x2_ELE
2790- : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2791- Int32Regs>;
2792- defm INT_PTX_LDU_G_v4f32_ELE
2793- : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2794- Float32Regs>;
2760+ "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
2761+ def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2762+ regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset),
2763+ "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];", []>;
2764+ }
2765+
2766+ defm INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"u8", Int16Regs>;
2767+ defm INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"u16", Int16Regs>;
2768+ defm INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"u32", Int32Regs>;
2769+ defm INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"f32", Float32Regs>;
2770+ defm INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"u64", Int64Regs>;
2771+ defm INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"f64", Float64Regs>;
2772+
2773+ defm INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"u8", Int16Regs>;
2774+ defm INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"u16", Int16Regs>;
2775+ defm INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"u32", Int32Regs>;
2776+ defm INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>;
2777+ defm INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>;
2778+ defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>;
27952779
27962780
27972781//-----------------------------------
@@ -2803,84 +2787,63 @@ defm INT_PTX_LDU_G_v4f32_ELE
28032787// during the lifetime of the kernel.
28042788
28052789multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2806- def avar : NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2807- !strconcat( "ld.global.nc.", TyStr) ,
2790+ def asi : NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset ),
2791+ "ld.global.nc." # TyStr # " \t$result, [$src$offset];" ,
28082792 []>, Requires<[hasLDG]>;
28092793 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2810- !strconcat( "ld.global.nc.", TyStr) ,
2794+ "ld.global.nc." # TyStr # " \t$result, [$src];" ,
28112795 []>, Requires<[hasLDG]>;
28122796 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2813- !strconcat( "ld.global.nc.", TyStr) ,
2797+ "ld.global.nc." # TyStr # " \t$result, [$src];" ,
28142798 []>, Requires<[hasLDG]>;
28152799}
28162800
2817- defm INT_PTX_LDG_GLOBAL_i8
2818- : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2819- defm INT_PTX_LDG_GLOBAL_i16
2820- : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2821- defm INT_PTX_LDG_GLOBAL_i32
2822- : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2823- defm INT_PTX_LDG_GLOBAL_i64
2824- : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2825- defm INT_PTX_LDG_GLOBAL_f32
2826- : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2827- defm INT_PTX_LDG_GLOBAL_f64
2828- : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2801+ defm INT_PTX_LDG_GLOBAL_i8 : LDG_G<"u8", Int16Regs>;
2802+ defm INT_PTX_LDG_GLOBAL_i16 : LDG_G<"u16", Int16Regs>;
2803+ defm INT_PTX_LDG_GLOBAL_i32 : LDG_G<"u32", Int32Regs>;
2804+ defm INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64", Int64Regs>;
2805+ defm INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32", Float32Regs>;
2806+ defm INT_PTX_LDG_GLOBAL_f64 : LDG_G<"f64", Float64Regs>;
28292807
28302808// vector
28312809
28322810// Elementized vector ldg
28332811multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
28342812 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
28352813 (ins MEMri:$src),
2836- !strconcat( "ld.global.nc.", TyStr) , []>;
2814+ "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
28372815 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
28382816 (ins MEMri64:$src),
2839- !strconcat( "ld.global.nc.", TyStr) , []>;
2840- def _avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2841- (ins imemAny:$src),
2842- !strconcat( "ld.global.nc.", TyStr) , []>;
2817+ "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
2818+ def _asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2819+ (ins imemAny:$src, Offseti32imm:$offset ),
2820+ "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];" , []>;
28432821}
28442822
28452823multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2846- def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2847- regclass:$dst4), (ins Int32Regs:$src),
2848- !strconcat("ld.global.nc.", TyStr), []>;
2849- def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2850- regclass:$dst4), (ins Int64Regs:$src),
2851- !strconcat("ld.global.nc.", TyStr), []>;
28522824 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
28532825 regclass:$dst4), (ins MEMri:$src),
2854- !strconcat( "ld.global.nc.", TyStr) , []>;
2826+ "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];" , []>;
28552827 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
28562828 regclass:$dst4), (ins MEMri64:$src),
2857- !strconcat( "ld.global.nc.", TyStr) , []>;
2858- def _avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2859- regclass:$dst4), (ins imemAny:$src),
2860- !strconcat( "ld.global.nc.", TyStr) , []>;
2829+ "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];" , []>;
2830+ def _asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2831+ regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset ),
2832+ "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];" , []>;
28612833}
28622834
28632835// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2864- defm INT_PTX_LDG_G_v2i8_ELE
2865- : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2866- defm INT_PTX_LDG_G_v2i16_ELE
2867- : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2868- defm INT_PTX_LDG_G_v2i32_ELE
2869- : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2870- defm INT_PTX_LDG_G_v2f32_ELE
2871- : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2872- defm INT_PTX_LDG_G_v2i64_ELE
2873- : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2874- defm INT_PTX_LDG_G_v2f64_ELE
2875- : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2876- defm INT_PTX_LDG_G_v4i8_ELE
2877- : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2878- defm INT_PTX_LDG_G_v4i16_ELE
2879- : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2880- defm INT_PTX_LDG_G_v4i32_ELE
2881- : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2882- defm INT_PTX_LDG_G_v4f32_ELE
2883- : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2836+ defm INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"u8", Int16Regs>;
2837+ defm INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"u16", Int16Regs>;
2838+ defm INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"u32", Int32Regs>;
2839+ defm INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"f32", Float32Regs>;
2840+ defm INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"u64", Int64Regs>;
2841+ defm INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"f64", Float64Regs>;
2842+
2843+ defm INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"u8", Int16Regs>;
2844+ defm INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"u16", Int16Regs>;
2845+ defm INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>;
2846+ defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>;
28842847
28852848
28862849multiclass NG_TO_G<string Str> {
0 commit comments