@@ -5091,8 +5091,8 @@ let Predicates = [hasSM<90>, hasPTX<78>] in {
50915091def EXIT : NullaryInst<"exit", int_nvvm_exit>;
50925092
50935093// Tcgen05 intrinsics
5094- let isConvergent = true, Predicates = [hasTcgen05Instructions] in {
5095-
5094+ let isConvergent = true in {
5095+ let Predicates = [callSubtarget<"hasTcgen05InstSupport">] in {
50965096multiclass TCGEN05_ALLOC_INTR<string AS, string num, Intrinsic Intr> {
50975097 def "" : BasicNVPTXInst<(outs),
50985098 (ins ADDR:$dst, B32:$ncols),
@@ -5144,15 +5144,6 @@ defm TCGEN05_COMMIT_CG2 : TCGEN05_COMMIT_INTR<"", "2">;
51445144defm TCGEN05_COMMIT_S64_CG1 : TCGEN05_COMMIT_INTR<"shared", "1">;
51455145defm TCGEN05_COMMIT_S64_CG2 : TCGEN05_COMMIT_INTR<"shared", "2">;
51465146
5147- multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> {
5148- def "" : BasicNVPTXInst<(outs),
5149- (ins ADDR:$tmem_addr),
5150- "tcgen05.shift.cta_group::" # num # ".down",
5151- [(Intr addr:$tmem_addr)]>;
5152- }
5153- defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>;
5154- defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>;
5155-
51565147multiclass TCGEN05_CP_INTR<string shape, string src_fmt, string mc = ""> {
51575148 defvar dst_fmt = !if(!eq(src_fmt, ""), "", ".b8x16");
51585149 defvar fmt_asm = StrJoin<".", [dst_fmt, src_fmt]>.ret;
@@ -5183,9 +5174,22 @@ foreach src_fmt = ["", "b6x16_p32", "b4x16_p64"] in {
51835174 defm TCGEN05_CP_64x128_2 # src_fmt : TCGEN05_CP_INTR<"64x128b", src_fmt, "warpx2::01_23">;
51845175 defm TCGEN05_CP_32x128 # src_fmt : TCGEN05_CP_INTR<"32x128b", src_fmt, "warpx4">;
51855176}
5177+ } // Predicates
5178+
5179+ let Predicates = [hasTcgen05ShiftSupport] in {
5180+ multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> {
5181+ def "" : BasicNVPTXInst<(outs),
5182+ (ins ADDR:$tmem_addr),
5183+ "tcgen05.shift.cta_group::" # num # ".down",
5184+ [(Intr addr:$tmem_addr)]>;
5185+ }
5186+ defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>;
5187+ defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>;
5188+ } // Predicates
5189+
51865190} // isConvergent
51875191
5188- let hasSideEffects = 1, Predicates = [hasTcgen05Instructions ] in {
5192+ let hasSideEffects = 1, Predicates = [callSubtarget<"hasTcgen05InstSupport"> ] in {
51895193
51905194 def tcgen05_fence_before_thread_sync: NullaryInst<
51915195 "tcgen05.fence::before_thread_sync", int_nvvm_tcgen05_fence_before_thread_sync>;
@@ -5219,8 +5223,7 @@ class TCGEN05_LDST_REGINFO<int Veclen> {
52195223//
52205224
52215225class TCGEN05_LD_INST<string Shape, int Num, bit Pack> :
5222- NVPTXInst<(outs), (ins), "?", []>,
5223- Requires<[hasTcgen05Instructions]> {
5226+ NVPTXInst<(outs), (ins), "?", []> {
52245227
52255228 TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
52265229 NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;
@@ -5244,8 +5247,7 @@ class TCGEN05_LD_INST<string Shape, int Num, bit Pack> :
52445247//
52455248
52465249class TCGEN05_ST_INST<string Shape, int Num, bit Unpack> :
5247- NVPTXInst<(outs), (ins), "?", []>,
5248- Requires<[hasTcgen05Instructions]> {
5250+ NVPTXInst<(outs), (ins), "?", []> {
52495251
52505252 TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
52515253 NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;
0 commit comments