Skip to content

Commit 7e3815b

Browse files
committed
add prefetch tensormap variant
1 parent e34e021 commit 7e3815b

File tree

4 files changed

+50
-1
lines changed

4 files changed

+50
-1
lines changed

llvm/docs/NVPTXUsage.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,9 @@ Syntax:
971971
declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
972972
declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
973973
974+
declare void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr)
975+
declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
976+
974977
declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
975978
declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
976979
@@ -983,7 +986,10 @@ The '``@llvm.nvvm.prefetch.*``' and '``@llvm.nvvm.prefetchu.*``' intrinsic
983986
correspond to the '``prefetch.*``;' and '``prefetchu.*``' family of PTX instructions.
984987
The '``prefetch.*``' instructions bring the cache line containing the
985988
specified address in global or local memory address space into the
986-
specified cache level (L1 or L2). The '`prefetchu.*``' instruction brings the cache line
989+
specified cache level (L1 or L2). If the '``.tensormap``' qualifier is specified then the
990+
prefetch instruction brings the cache line containing the specified address in the
991+
'``.const``' or '``.param memory``' state space for subsequent use by the '``cp.async.bulk.tensor``'
992+
instruction.The '`prefetchu.*``' instruction brings the cache line
987993
containing the specified generic address into the specified uniform cache level.
988994
If no address space is specified, it is assumed to be generic address. The intrinsic
989995
uses and eviction priority which can be accessed by the '``.level::eviction_priority``' modifier.

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@
137137

138138
def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
139139
def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
140+
def llvm_constant_ptr_ty: LLVMQualPointerType<4>; // (const)ptr
140141
def llvm_local_ptr_ty : LLVMQualPointerType<5>; // (local)ptr
141142
def llvm_tmem_ptr_ty : LLVMQualPointerType<6>; // (tensor memory)ptr
142143
def llvm_shared_cluster_ptr_ty : LLVMQualPointerType<7>; // (shared_cluster)ptr
@@ -2092,6 +2093,9 @@ let IntrProperties = [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<
20922093
def int_nvvm_prefetch_local_ # level : Intrinsic<[], [llvm_local_ptr_ty]>;
20932094
}
20942095

2096+
def int_nvvm_prefetch_tensormap_p0: DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>;
2097+
def int_nvvm_prefetch_tensormap_p4 : DefaultAttrsIntrinsic<[], [llvm_constant_ptr_ty]>;
2098+
20952099
foreach eviction_priority = ["evict_normal", "evict_last"] in
20962100
def int_nvvm_prefetch_global_L2_ # eviction_priority : Intrinsic<[], [llvm_global_ptr_ty]>;
20972101

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,15 @@ def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1">;
760760
def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1">;
761761
def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">;
762762
def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">;
763+
def PREFETCH_CONST_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr),
764+
"prefetch.const.tensormap",
765+
[(int_nvvm_prefetch_tensormap_p4 addr:$addr)]>,
766+
Requires<[hasPTX<80>, hasSM<90>]>;
767+
768+
def PREFETCH_GENERIC_TENSORMAP : BasicNVPTXInst<(outs), (ins ADDR:$addr),
769+
"prefetch.tensormap",
770+
[(int_nvvm_prefetch_tensormap_p0 addr:$addr)]>,
771+
Requires<[hasPTX<80>, hasSM<90>]>;
763772

764773
def PREFETCH_GLOBAL_L2_EVICT_NORMAL : BasicNVPTXInst<(outs), (ins ADDR:$addr),
765774
"prefetch.global.L2::evict_normal",

llvm/test/CodeGen/NVPTX/prefetch.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
1212
declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
1313
declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
1414

15+
declare void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr)
16+
declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
17+
1518
declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
1619
declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
1720

@@ -67,6 +70,33 @@ define void @prefetch_(ptr %ptr) {
6770
ret void
6871
}
6972

73+
74+
define void @prefetch_generic_tensormap(ptr %ptr) {
75+
; CHECK-PTX64-LABEL: prefetch_generic_tensormap(
76+
; CHECK-PTX64: {
77+
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
78+
; CHECK-PTX64-EMPTY:
79+
; CHECK-PTX64-NEXT: // %bb.0:
80+
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_generic_tensormap_param_0];
81+
; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1];
82+
; CHECK-PTX64-NEXT: ret;
83+
tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr)
84+
ret void
85+
}
86+
87+
define void @prefetch_const_tensormap(ptr addrspace(4) %const_ptr) {
88+
; CHECK-PTX64-LABEL: prefetch_const_tensormap(
89+
; CHECK-PTX64: {
90+
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
91+
; CHECK-PTX64-EMPTY:
92+
; CHECK-PTX64-NEXT: // %bb.0:
93+
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_param_0];
94+
; CHECK-PTX64-NEXT: prefetch.const.tensormap [%rd1];
95+
; CHECK-PTX64-NEXT: ret;
96+
tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
97+
ret void
98+
}
99+
70100
define void @prefetchu_l1(ptr %ptr) {
71101
; CHECK-PTX64-LABEL: prefetchu_l1(
72102
; CHECK-PTX64: {

0 commit comments

Comments
 (0)