@@ -2143,15 +2143,12 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
21432143
21442144class LDU_G<string TyStr, NVPTXRegClass regclass>
21452145 : NVPTXInst<(outs regclass:$result), (ins ADDR:$src),
2146- "ldu.global." # TyStr # " \t$result, [$src];",
2147- []>, Requires<[hasLDU]>;
2146+ "ldu.global." # TyStr # " \t$result, [$src];", []>;
21482147
2149- def INT_PTX_LDU_GLOBAL_i8 : LDU_G<"b8", Int16Regs>;
2150- def INT_PTX_LDU_GLOBAL_i16 : LDU_G<"b16", Int16Regs>;
2151- def INT_PTX_LDU_GLOBAL_i32 : LDU_G<"b32", Int32Regs>;
2152- def INT_PTX_LDU_GLOBAL_i64 : LDU_G<"b64", Int64Regs>;
2153- def INT_PTX_LDU_GLOBAL_f32 : LDU_G<"b32", Float32Regs>;
2154- def INT_PTX_LDU_GLOBAL_f64 : LDU_G<"b64", Float64Regs>;
2148+ def LDU_GLOBAL_i8 : LDU_G<"b8", Int16Regs>;
2149+ def LDU_GLOBAL_i16 : LDU_G<"b16", Int16Regs>;
2150+ def LDU_GLOBAL_i32 : LDU_G<"b32", Int32Regs>;
2151+ def LDU_GLOBAL_i64 : LDU_G<"b64", Int64Regs>;
21552152
21562153// vector
21572154
@@ -2168,19 +2165,14 @@ class VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass>
21682165 "ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
21692166
21702167
2171- def INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"b8", Int16Regs>;
2172- def INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"b16", Int16Regs>;
2173- def INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"b32", Int32Regs>;
2174- def INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"b32", Float32Regs>;
2175- def INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"b64", Int64Regs>;
2176- def INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"b64", Float64Regs>;
2168+ def LDU_GLOBAL_v2i8 : VLDU_G_ELE_V2<"b8", Int16Regs>;
2169+ def LDU_GLOBAL_v2i16 : VLDU_G_ELE_V2<"b16", Int16Regs>;
2170+ def LDU_GLOBAL_v2i32 : VLDU_G_ELE_V2<"b32", Int32Regs>;
2171+ def LDU_GLOBAL_v2i64 : VLDU_G_ELE_V2<"b64", Int64Regs>;
21772172
2178- def INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"b8", Int16Regs>;
2179- def INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>;
2180- def INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>;
2181- def INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>;
2182- def INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>;
2183- def INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"b32", Float32Regs>;
2173+ def LDU_GLOBAL_v4i8 : VLDU_G_ELE_V4<"b8", Int16Regs>;
2174+ def LDU_GLOBAL_v4i16 : VLDU_G_ELE_V4<"b16", Int16Regs>;
2175+ def LDU_GLOBAL_v4i32 : VLDU_G_ELE_V4<"b32", Int32Regs>;
21842176
21852177
21862178//-----------------------------------
@@ -2191,55 +2183,47 @@ def INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"b32", Float32Regs>;
21912183// non-coherent texture cache, and therefore the values read must be read-only
21922184// during the lifetime of the kernel.
21932185
2194- class LDG_G<string TyStr, NVPTXRegClass regclass>
2195- : NVPTXInst<(outs regclass:$result), (ins ADDR:$src),
2196- "ld.global.nc." # TyStr # " \t$result, [$src];",
2197- []>, Requires<[hasLDG]>;
2186+ class LDG_G<NVPTXRegClass regclass>
2187+ : NVPTXInst<(outs regclass:$result), (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2188+ "ld.global.nc.${Sign:sign}$fromWidth \t$result, [$src];", []>;
21982189
2199- def INT_PTX_LDG_GLOBAL_i8 : LDG_G<"b8", Int16Regs>;
2200- def INT_PTX_LDG_GLOBAL_i16 : LDG_G<"b16", Int16Regs>;
2201- def INT_PTX_LDG_GLOBAL_i32 : LDG_G<"b32", Int32Regs>;
2202- def INT_PTX_LDG_GLOBAL_i64 : LDG_G<"b64", Int64Regs>;
2203- def INT_PTX_LDG_GLOBAL_f32 : LDG_G<"b32", Float32Regs>;
2204- def INT_PTX_LDG_GLOBAL_f64 : LDG_G<"b64", Float64Regs>;
2190+ def LD_GLOBAL_NC_i8 : LDG_G<Int16Regs>;
2191+ def LD_GLOBAL_NC_i16 : LDG_G<Int16Regs>;
2192+ def LD_GLOBAL_NC_i32 : LDG_G<Int32Regs>;
2193+ def LD_GLOBAL_NC_i64 : LDG_G<Int64Regs>;
22052194
22062195// vector
22072196
22082197// Elementized vector ldg
2209- class VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> :
2198+ class VLDG_G_ELE_V2<NVPTXRegClass regclass> :
22102199 NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2211- (ins ADDR:$src),
2212- "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>;
2200+ (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2201+ "ld.global.nc.v2.${Sign:sign}$fromWidth \t{{$dst1, $dst2}}, [$src];", []>;
22132202
22142203
2215- class VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> :
2204+ class VLDG_G_ELE_V4<NVPTXRegClass regclass> :
22162205 NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2217- (ins ADDR:$src),
2218- "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
2206+ (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2207+ "ld.global.nc.v4.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
22192208
2220- class VLDG_G_ELE_V8<string TyStr, NVPTXRegClass regclass> :
2209+ class VLDG_G_ELE_V8<NVPTXRegClass regclass> :
22212210 NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
22222211 regclass:$dst5, regclass:$dst6, regclass:$dst7, regclass:$dst8),
2223- (ins ADDR:$src),
2224- "ld.global.nc.v8." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, [$src];", []>;
2212+ (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2213+ "ld.global.nc.v8.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, [$src];", []>;
22252214
22262215// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2227- def INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"b8", Int16Regs>;
2228- def INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"b16", Int16Regs>;
2229- def INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"b32", Int32Regs>;
2230- def INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"b32", Float32Regs>;
2231- def INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"b64", Int64Regs>;
2232- def INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"b64", Float64Regs>;
2233-
2234- def INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"b8", Int16Regs>;
2235- def INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"b16", Int16Regs>;
2236- def INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"b32", Int32Regs>;
2237- def INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"b32", Float32Regs>;
2238-
2239- def INT_PTX_LDG_G_v4i64_ELE : VLDG_G_ELE_V4<"b64", Int64Regs>;
2240- def INT_PTX_LDG_G_v4f64_ELE : VLDG_G_ELE_V4<"b64", Float64Regs>;
2241- def INT_PTX_LDG_G_v8i32_ELE : VLDG_G_ELE_V8<"b32", Int32Regs>;
2242- def INT_PTX_LDG_G_v8f32_ELE : VLDG_G_ELE_V8<"b32", Float32Regs>;
2216+ def LD_GLOBAL_NC_v2i8 : VLDG_G_ELE_V2<Int16Regs>;
2217+ def LD_GLOBAL_NC_v2i16 : VLDG_G_ELE_V2<Int16Regs>;
2218+ def LD_GLOBAL_NC_v2i32 : VLDG_G_ELE_V2<Int32Regs>;
2219+ def LD_GLOBAL_NC_v2i64 : VLDG_G_ELE_V2<Int64Regs>;
2220+
2221+ def LD_GLOBAL_NC_v4i8 : VLDG_G_ELE_V4<Int16Regs>;
2222+ def LD_GLOBAL_NC_v4i16 : VLDG_G_ELE_V4<Int16Regs>;
2223+ def LD_GLOBAL_NC_v4i32 : VLDG_G_ELE_V4<Int32Regs>;
2224+
2225+ def LD_GLOBAL_NC_v4i64 : VLDG_G_ELE_V4<Int64Regs>;
2226+ def LD_GLOBAL_NC_v8i32 : VLDG_G_ELE_V8<Int32Regs>;
22432227
22442228multiclass NG_TO_G<string Str, bit Supports32 = 1, list<Predicate> Preds = []> {
22452229 if Supports32 then
0 commit comments