127127// * llvm.nvvm.atomic.load.inc.32 --> atomicrmw uinc_wrap
128128// * llvm.nvvm.atomic.load.dec.32 --> atomicrmw udec_wrap
129129
130- def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
131- def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
132- def llvm_local_ptr_ty : LLVMQualPointerType<5>; // (local)ptr
133- def llvm_tmem_ptr_ty : LLVMQualPointerType<6>; // (tensor memory)ptr
134- def llvm_dshared_ptr_ty : LLVMQualPointerType<7>; // (dshared )ptr
130+ def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
131+ def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
132+ def llvm_local_ptr_ty : LLVMQualPointerType<5>; // (local)ptr
133+ def llvm_tmem_ptr_ty : LLVMQualPointerType<6>; // (tensor memory)ptr
134+ def llvm_shared_cluster_ptr_ty : LLVMQualPointerType<7>; // (shared_cluster )ptr
135135
136136//
137137// MISC
@@ -692,15 +692,15 @@ class CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, string mode> {
692692 list<LLVMType> Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets);
693693 list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
694694 list<LLVMType> ArgsTy = !listconcat(
695- [llvm_dshared_ptr_ty , // dst_smem_ptr
696- llvm_shared_ptr_ty, // mbarrier_smem_ptr
697- llvm_ptr_ty], // tensormap_ptr
698- TensorDimsTy, // actual tensor dims
699- Im2ColOffsetsTy, // im2col offsets
700- [llvm_i16_ty, // cta_mask
701- llvm_i64_ty, // cache_hint
702- llvm_i1_ty, // Flag for cta_mask
703- llvm_i1_ty] // Flag for cache_hint
695+ [llvm_shared_cluster_ptr_ty , // dst_shared_cluster_ptr
696+ llvm_shared_ptr_ty, // mbarrier_smem_ptr
697+ llvm_ptr_ty], // tensormap_ptr
698+ TensorDimsTy, // actual tensor dims
699+ Im2ColOffsetsTy, // im2col offsets
700+ [llvm_i16_ty, // cta_mask
701+ llvm_i64_ty, // cache_hint
702+ llvm_i1_ty, // Flag for cta_mask
703+ llvm_i1_ty] // Flag for cache_hint
704704 );
705705
706706 int TempFlagsStartIdx = !add(dim, 5);
@@ -5119,7 +5119,7 @@ def int_nvvm_mapa
51195119 [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
51205120 "llvm.nvvm.mapa">;
51215121def int_nvvm_mapa_shared_cluster
5122- : DefaultAttrsIntrinsic<[llvm_dshared_ptr_ty ], [llvm_shared_ptr_ty, llvm_i32_ty],
5122+ : DefaultAttrsIntrinsic<[llvm_shared_cluster_ptr_ty ], [llvm_shared_ptr_ty, llvm_i32_ty],
51235123 [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
51245124 "llvm.nvvm.mapa.shared.cluster">;
51255125def int_nvvm_getctarank
@@ -5219,14 +5219,14 @@ def int_nvvm_discard_L2 : DefaultAttrsIntrinsic<[],
52195219// From Global to Shared Cluster
52205220def int_nvvm_cp_async_bulk_global_to_shared_cluster
52215221 : DefaultAttrsIntrinsic<[],
5222- [llvm_dshared_ptr_ty , // dst_dsmem_ptr
5223- llvm_shared_ptr_ty, // mbarrier_ptr
5224- llvm_global_ptr_ty, // src_gmem_ptr
5225- llvm_i32_ty, // copy_size
5226- llvm_i16_ty, // cta_mask
5227- llvm_i64_ty, // cache_hint
5228- llvm_i1_ty, // Flag for cta_mask
5229- llvm_i1_ty], // Flag for cache_hint
5222+ [llvm_shared_cluster_ptr_ty , // dst_shared_cluster_ptr
5223+ llvm_shared_ptr_ty, // mbarrier_ptr
5224+ llvm_global_ptr_ty, // src_gmem_ptr
5225+ llvm_i32_ty, // copy_size
5226+ llvm_i16_ty, // cta_mask
5227+ llvm_i64_ty, // cache_hint
5228+ llvm_i1_ty, // Flag for cta_mask
5229+ llvm_i1_ty], // Flag for cache_hint
52305230 [IntrConvergent, IntrArgMemOnly,
52315231 WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
52325232 NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
@@ -5236,10 +5236,10 @@ def int_nvvm_cp_async_bulk_global_to_shared_cluster
52365236// From Shared CTA to Shared Cluster
52375237def int_nvvm_cp_async_bulk_shared_cta_to_cluster
52385238 : DefaultAttrsIntrinsic<[],
5239- [llvm_dshared_ptr_ty , // dst_dsmem_ptr
5240- llvm_shared_ptr_ty, // mbarrier_ptr
5241- llvm_shared_ptr_ty, // src_smem_ptr
5242- llvm_i32_ty], // copy_size
5239+ [llvm_shared_cluster_ptr_ty , // dst_shared_cluster_ptr
5240+ llvm_shared_ptr_ty, // mbarrier_ptr
5241+ llvm_shared_ptr_ty, // src_smem_ptr
5242+ llvm_i32_ty], // copy_size
52435243 [IntrConvergent, IntrArgMemOnly,
52445244 WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
52455245 NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
0 commit comments