@@ -5024,8 +5024,8 @@ let Predicates = [hasSM<90>, hasPTX<78>] in {
50245024def EXIT : NullaryInst<"exit", int_nvvm_exit>;
50255025
50265026// Tcgen05 intrinsics
5027- let isConvergent = true, Predicates = [hasTcgen05Instructions] in {
5028-
5027+ let isConvergent = true in {
5028+ let Predicates = [callSubtarget<"hasTcgen05InstSupport">] in {
50295029multiclass TCGEN05_ALLOC_INTR<string AS, string num, Intrinsic Intr> {
50305030 def "" : BasicNVPTXInst<(outs),
50315031 (ins ADDR:$dst, B32:$ncols),
@@ -5077,15 +5077,6 @@ defm TCGEN05_COMMIT_CG2 : TCGEN05_COMMIT_INTR<"", "2">;
50775077defm TCGEN05_COMMIT_S64_CG1 : TCGEN05_COMMIT_INTR<"shared", "1">;
50785078defm TCGEN05_COMMIT_S64_CG2 : TCGEN05_COMMIT_INTR<"shared", "2">;
50795079
5080- multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> {
5081- def "" : BasicNVPTXInst<(outs),
5082- (ins ADDR:$tmem_addr),
5083- "tcgen05.shift.cta_group::" # num # ".down",
5084- [(Intr addr:$tmem_addr)]>;
5085- }
5086- defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>;
5087- defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>;
5088-
50895080multiclass TCGEN05_CP_INTR<string shape, string src_fmt, string mc = ""> {
50905081 defvar dst_fmt = !if(!eq(src_fmt, ""), "", ".b8x16");
50915082 defvar fmt_asm = StrJoin<".", [dst_fmt, src_fmt]>.ret;
@@ -5116,9 +5107,22 @@ foreach src_fmt = ["", "b6x16_p32", "b4x16_p64"] in {
51165107 defm TCGEN05_CP_64x128_2 # src_fmt : TCGEN05_CP_INTR<"64x128b", src_fmt, "warpx2::01_23">;
51175108 defm TCGEN05_CP_32x128 # src_fmt : TCGEN05_CP_INTR<"32x128b", src_fmt, "warpx4">;
51185109}
5110+ } // Predicates
5111+
5112+ let Predicates = [hasTcgen05ShiftSupport] in {
5113+ multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> {
5114+ def "" : BasicNVPTXInst<(outs),
5115+ (ins ADDR:$tmem_addr),
5116+ "tcgen05.shift.cta_group::" # num # ".down",
5117+ [(Intr addr:$tmem_addr)]>;
5118+ }
5119+ defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>;
5120+ defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>;
5121+ } // Predicates
5122+
51195123} // isConvergent
51205124
5121- let hasSideEffects = 1, Predicates = [hasTcgen05Instructions ] in {
5125+ let hasSideEffects = 1, Predicates = [callSubtarget<"hasTcgen05InstSupport"> ] in {
51225126
51235127 def tcgen05_fence_before_thread_sync: NullaryInst<
51245128 "tcgen05.fence::before_thread_sync", int_nvvm_tcgen05_fence_before_thread_sync>;
@@ -5152,8 +5156,7 @@ class TCGEN05_LDST_REGINFO<int Veclen> {
51525156//
51535157
51545158class TCGEN05_LD_INST<string Shape, int Num, bit Pack> :
5155- NVPTXInst<(outs), (ins), "?", []>,
5156- Requires<[hasTcgen05Instructions]> {
5159+ NVPTXInst<(outs), (ins), "?", []> {
51575160
51585161 TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
51595162 NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;
@@ -5177,8 +5180,7 @@ class TCGEN05_LD_INST<string Shape, int Num, bit Pack> :
51775180//
51785181
51795182class TCGEN05_ST_INST<string Shape, int Num, bit Unpack> :
5180- NVPTXInst<(outs), (ins), "?", []>,
5181- Requires<[hasTcgen05Instructions]> {
5183+ NVPTXInst<(outs), (ins), "?", []> {
51825184
51835185 TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
51845186 NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;
0 commit comments