@@ -2693,80 +2693,64 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
26932693// Scalar
26942694
26952695multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2696- def avar : NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2697- !strconcat( "ldu.global.", TyStr) ,
2696+ def asi : NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset ),
2697+ "ldu.global." # TyStr # " \t$result, [$src$offset];" ,
26982698 []>, Requires<[hasLDU]>;
26992699 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2700- !strconcat( "ldu.global.", TyStr) ,
2700+ "ldu.global." # TyStr # " \t$result, [$src];" ,
27012701 []>, Requires<[hasLDU]>;
27022702 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2703- !strconcat( "ldu.global.", TyStr) ,
2703+ "ldu.global." # TyStr # " \t$result, [$src];" ,
27042704 []>, Requires<[hasLDU]>;
27052705}
27062706
2707- defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src]; ", Int16Regs>;
2708- defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src]; ", Int16Regs>;
2709- defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src]; ", Int32Regs>;
2710- defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src]; ", Int64Regs>;
2711- defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src]; ", Float32Regs>;
2712- defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src]; ", Float64Regs>;
2707+ defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8", Int16Regs>;
2708+ defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16", Int16Regs>;
2709+ defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32", Int32Regs>;
2710+ defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64", Int64Regs>;
2711+ defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32", Float32Regs>;
2712+ defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64", Float64Regs>;
27132713
27142714// vector
27152715
27162716// Elementized vector ldu
27172717multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
27182718 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
27192719 (ins MEMri:$src),
2720- !strconcat( "ldu.global.", TyStr) , []>;
2720+ "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
27212721 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
27222722 (ins MEMri64:$src),
2723- !strconcat( "ldu.global.", TyStr) , []>;
2724- def _avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2725- (ins imemAny:$src),
2726- !strconcat( "ldu.global.", TyStr) , []>;
2723+ "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
2724+ def _asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2725+ (ins imemAny:$src, Offseti32imm:$offset ),
2726+ "ldu.global.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];" , []>;
27272727}
27282728
27292729multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
27302730 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
27312731 regclass:$dst4), (ins MEMri:$src),
2732- !strconcat( "ldu.global.", TyStr) , []>;
2732+ "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];" , []>;
27332733 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
27342734 regclass:$dst4), (ins MEMri64:$src),
2735- !strconcat("ldu.global.", TyStr), []>;
2736- def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2737- regclass:$dst4), (ins imemAny:$src),
2738- !strconcat("ldu.global.", TyStr), []>;
2739- }
2740-
2741- defm INT_PTX_LDU_G_v2i8_ELE
2742- : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2743- defm INT_PTX_LDU_G_v2i16_ELE
2744- : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2745- defm INT_PTX_LDU_G_v2i32_ELE
2746- : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2747- defm INT_PTX_LDU_G_v2f32_ELE
2748- : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2749- defm INT_PTX_LDU_G_v2i64_ELE
2750- : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2751- defm INT_PTX_LDU_G_v2f64_ELE
2752- : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2753- defm INT_PTX_LDU_G_v4i8_ELE
2754- : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2755- defm INT_PTX_LDU_G_v4i16_ELE
2756- : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2757- Int16Regs>;
2758- defm INT_PTX_LDU_G_v4i32_ELE
2759- : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2760- Int32Regs>;
2761- defm INT_PTX_LDU_G_v4f16_ELE
2762- : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2763- Int16Regs>;
2764- defm INT_PTX_LDU_G_v4f16x2_ELE
2765- : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2766- Int32Regs>;
2767- defm INT_PTX_LDU_G_v4f32_ELE
2768- : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2769- Float32Regs>;
2735+ "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
2736+ def _asi: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2737+ regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset),
2738+ "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];", []>;
2739+ }
2740+
2741+ defm INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"u8", Int16Regs>;
2742+ defm INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"u16", Int16Regs>;
2743+ defm INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"u32", Int32Regs>;
2744+ defm INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"f32", Float32Regs>;
2745+ defm INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"u64", Int64Regs>;
2746+ defm INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"f64", Float64Regs>;
2747+
2748+ defm INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"u8", Int16Regs>;
2749+ defm INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"u16", Int16Regs>;
2750+ defm INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"u32", Int32Regs>;
2751+ defm INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>;
2752+ defm INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>;
2753+ defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"f32", Float32Regs>;
27702754
27712755
27722756//-----------------------------------
@@ -2778,84 +2762,63 @@ defm INT_PTX_LDU_G_v4f32_ELE
27782762// during the lifetime of the kernel.
27792763
27802764multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2781- def avar : NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2782- !strconcat( "ld.global.nc.", TyStr) ,
2765+ def asi : NVPTXInst<(outs regclass:$result), (ins imemAny:$src, Offseti32imm:$offset ),
2766+ "ld.global.nc." # TyStr # " \t$result, [$src$offset];" ,
27832767 []>, Requires<[hasLDG]>;
27842768 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2785- !strconcat( "ld.global.nc.", TyStr) ,
2769+ "ld.global.nc." # TyStr # " \t$result, [$src];" ,
27862770 []>, Requires<[hasLDG]>;
27872771 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2788- !strconcat( "ld.global.nc.", TyStr) ,
2772+ "ld.global.nc." # TyStr # " \t$result, [$src];" ,
27892773 []>, Requires<[hasLDG]>;
27902774}
27912775
2792- defm INT_PTX_LDG_GLOBAL_i8
2793- : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2794- defm INT_PTX_LDG_GLOBAL_i16
2795- : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2796- defm INT_PTX_LDG_GLOBAL_i32
2797- : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2798- defm INT_PTX_LDG_GLOBAL_i64
2799- : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2800- defm INT_PTX_LDG_GLOBAL_f32
2801- : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2802- defm INT_PTX_LDG_GLOBAL_f64
2803- : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2776+ defm INT_PTX_LDG_GLOBAL_i8 : LDG_G<"u8", Int16Regs>;
2777+ defm INT_PTX_LDG_GLOBAL_i16 : LDG_G<"u16", Int16Regs>;
2778+ defm INT_PTX_LDG_GLOBAL_i32 : LDG_G<"u32", Int32Regs>;
2779+ defm INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64", Int64Regs>;
2780+ defm INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32", Float32Regs>;
2781+ defm INT_PTX_LDG_GLOBAL_f64 : LDG_G<"f64", Float64Regs>;
28042782
28052783// vector
28062784
28072785// Elementized vector ldg
28082786multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
28092787 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
28102788 (ins MEMri:$src),
2811- !strconcat( "ld.global.nc.", TyStr) , []>;
2789+ "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
28122790 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
28132791 (ins MEMri64:$src),
2814- !strconcat( "ld.global.nc.", TyStr) , []>;
2815- def _avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2816- (ins imemAny:$src),
2817- !strconcat( "ld.global.nc.", TyStr) , []>;
2792+ "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];" , []>;
2793+ def _asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2794+ (ins imemAny:$src, Offseti32imm:$offset ),
2795+ "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src$offset];" , []>;
28182796}
28192797
28202798multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2821- def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2822- regclass:$dst4), (ins Int32Regs:$src),
2823- !strconcat("ld.global.nc.", TyStr), []>;
2824- def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2825- regclass:$dst4), (ins Int64Regs:$src),
2826- !strconcat("ld.global.nc.", TyStr), []>;
28272799 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
28282800 regclass:$dst4), (ins MEMri:$src),
2829- !strconcat( "ld.global.nc.", TyStr) , []>;
2801+ "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];" , []>;
28302802 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
28312803 regclass:$dst4), (ins MEMri64:$src),
2832- !strconcat( "ld.global.nc.", TyStr) , []>;
2833- def _avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2834- regclass:$dst4), (ins imemAny:$src),
2835- !strconcat( "ld.global.nc.", TyStr) , []>;
2804+ "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];" , []>;
2805+ def _asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2806+ regclass:$dst4), (ins imemAny:$src, Offseti32imm:$offset ),
2807+ "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src$offset];" , []>;
28362808}
28372809
28382810// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2839- defm INT_PTX_LDG_G_v2i8_ELE
2840- : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2841- defm INT_PTX_LDG_G_v2i16_ELE
2842- : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2843- defm INT_PTX_LDG_G_v2i32_ELE
2844- : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2845- defm INT_PTX_LDG_G_v2f32_ELE
2846- : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2847- defm INT_PTX_LDG_G_v2i64_ELE
2848- : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2849- defm INT_PTX_LDG_G_v2f64_ELE
2850- : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2851- defm INT_PTX_LDG_G_v4i8_ELE
2852- : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2853- defm INT_PTX_LDG_G_v4i16_ELE
2854- : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2855- defm INT_PTX_LDG_G_v4i32_ELE
2856- : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2857- defm INT_PTX_LDG_G_v4f32_ELE
2858- : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2811+ defm INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"u8", Int16Regs>;
2812+ defm INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"u16", Int16Regs>;
2813+ defm INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"u32", Int32Regs>;
2814+ defm INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"f32", Float32Regs>;
2815+ defm INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"u64", Int64Regs>;
2816+ defm INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"f64", Float64Regs>;
2817+
2818+ defm INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"u8", Int16Regs>;
2819+ defm INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"u16", Int16Regs>;
2820+ defm INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32", Int32Regs>;
2821+ defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>;
28592822
28602823
28612824multiclass NG_TO_G<string Str> {
0 commit comments