From 015908adee722ed23dcef0daba88caff0de8519f Mon Sep 17 00:00:00 2001 From: abmajumder Date: Thu, 20 Feb 2025 16:38:41 +0530 Subject: [PATCH 1/7] applypriority intrinsics --- llvm/docs/NVPTXUsage.rst | 24 +++++++++++++++++ llvm/include/llvm/IR/IntrinsicsNVVM.td | 10 +++++++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 16 +++++++++++ llvm/test/CodeGen/NVPTX/applypriority.ll | 34 ++++++++++++++++++++++++ 4 files changed, 84 insertions(+) create mode 100644 llvm/test/CodeGen/NVPTX/applypriority.ll diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 675b458c41e7b..61ae07816bdfd 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -630,6 +630,30 @@ uses and eviction priority which can be accessed by the '``.level::eviction_prio For more information, refer to the PTX ISA ``_. +'``llvm.nvvm.applypriority.*``' +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +.. code-block:: llvm + + declare void @llvm.nvvm.applypriority.global.L2.evict.normal(ptr addrspace(1) %global_ptr, i64 %size) + declare void @llvm.nvvm.applypriority.L2.evict.normal(ptr %ptr, i64 %size) + +Overview: +""""""""" + +The '``@llvm.nvvm.applypriority.*``' applies the cache eviction priority specified by the +.level::eviction_priority qualifier to the address range [a..a+size) in the specified cache +level. If no state space is specified then Generic Addressing is used. If the specified address +does not fall within the address window of .global state space then the behavior is undefined. +The operand size is an integer constant that specifies the amount of data, in bytes, in the specified cache +level on which the priority is to be applied. The only supported value for the size operand is 128. + +For more information, refer to the PTX ISA +``_. + '``llvm.nvvm.cp.async.bulk.tensor.g2s.tile.[1-5]d``' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index c32bf0318b5d6..eff860bc3a850 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -5043,6 +5043,16 @@ def int_nvvm_prefetch_global_L2_evict_last: Intrinsic<[], [llvm_global_ptr_ty], def int_nvvm_prefetchu_L1 : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly, ReadOnly>, NoCapture>]>; +def int_nvvm_applypriority_global_L2_evict_normal + : Intrinsic<[], [llvm_global_ptr_ty, llvm_i64_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>, + ImmArg>]>; + +def int_nvvm_applypriority_L2_evict_normal + : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>, + ImmArg>]>; + // Intrinsics for Bulk Copy using TMA (non-tensor) // From Global to Shared Cluster diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index ed7963f35a7c7..78e0621fb52d9 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -789,6 +789,22 @@ def PREFETCH_GLOBAL_L2_EVICT_LAST : NVPTXInst<(outs), (ins Int64Regs:$addr), def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1">; +//Applypriority intrinsics +multiclass APPLYPRIORITY_L2_INTRS { + defvar InstName = "applypriority." + # !if(!eq(addr, ""), "", addr # ".") + # "L2::evict_normal"; + + def APPLYPRIORITY_L2 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int64Regs:$size), + InstName # " [$addr], $size;", + [(!cast("int_nvvm_" # !subst("::", "_", !subst(".", "_", InstName))) + i64:$addr, i64:$size)]>, + Requires<[hasPTX<80>, hasSM<90>]>; +} + +defm APPLYPRIORITY_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"">; +defm APPLYPRIORITY_GLOBAL_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"global">; + //----------------------------------- // MBarrier Functions //----------------------------------- diff --git a/llvm/test/CodeGen/NVPTX/applypriority.ll b/llvm/test/CodeGen/NVPTX/applypriority.ll new file mode 100644 index 0000000000000..51998f4d850c2 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/applypriority.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s +; RUN: %if ptxas-12.3 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %} + +target triple = "nvptx64-nvidia-cuda" + +declare void @llvm.nvvm.applypriority.global.L2.evict.normal(ptr addrspace(1) %global_ptr, i64 %size) +declare void @llvm.nvvm.applypriority.L2.evict.normal(ptr %ptr, i64 %size) + +define void @applypriority_global_L2(ptr addrspace(1) %global_ptr, i64 %size) { +; CHECK-PTX64-LABEL: applypriority_global_L2( +; CHECK-PTX64: { +; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK-PTX64-EMPTY: +; CHECK-PTX64-NEXT: // %bb.0: +; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [applypriority_global_L2_param_0]; +; CHECK-PTX64-NEXT: applypriority.global.L2::evict_normal [%rd1], 128; +; CHECK-PTX64-NEXT: ret; + tail call void @llvm.nvvm.applypriority.global.L2.evict.normal(ptr addrspace(1) %global_ptr, i64 128) + ret void +} + +define void @applypriority_L2(ptr %ptr, i64 %size) { +; CHECK-PTX64-LABEL: applypriority_L2( +; CHECK-PTX64: { +; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK-PTX64-EMPTY: +; CHECK-PTX64-NEXT: // %bb.0: +; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [applypriority_L2_param_0]; +; CHECK-PTX64-NEXT: applypriority.L2::evict_normal [%rd1], 128; +; CHECK-PTX64-NEXT: ret; + tail call void @llvm.nvvm.applypriority.L2.evict.normal(ptr %ptr, i64 128) + ret void +} From 38a8fc12ee8e547f2184536098ad2a0c4cbe3a4c Mon Sep 17 00:00:00 2001 From: abmajumder Date: Thu, 20 Feb 2025 17:05:07 +0530 Subject: [PATCH 2/7] remove multiclass and refine --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 78e0621fb52d9..bd8077e2dd887 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -790,20 +790,15 @@ def PREFETCH_GLOBAL_L2_EVICT_LAST : NVPTXInst<(outs), (ins Int64Regs:$addr), def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1">; //Applypriority intrinsics -multiclass APPLYPRIORITY_L2_INTRS { - defvar InstName = "applypriority." - # !if(!eq(addr, ""), "", addr # ".") - # "L2::evict_normal"; - - def APPLYPRIORITY_L2 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int64Regs:$size), - InstName # " [$addr], $size;", - [(!cast("int_nvvm_" # !subst("::", "_", !subst(".", "_", InstName))) - i64:$addr, i64:$size)]>, - Requires<[hasPTX<80>, hasSM<90>]>; -} - -defm APPLYPRIORITY_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"">; -defm APPLYPRIORITY_GLOBAL_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"global">; +class APPLYPRIORITY_L2_INTRS : + NVPTXInst<(outs), (ins Int64Regs:$addr, Int64Regs:$size), + InstName # " [$addr], $size;", + [(!cast("int_nvvm_" # !subst("::", "_", !subst(".", "_", InstName))) + i64:$addr, i64:$size)]>, + Requires<[hasPTX<80>, hasSM<90>]>; + +def APPLYPRIORITY_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"applypriority.L2::evict_normal">; +def APPLYPRIORITY_GLOBAL_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"applypriority.global.L2::evict_normal">; //----------------------------------- // MBarrier Functions From 4ebdc2cfa92ca38ec63810e2ef75b0e29d1b6317 Mon Sep 17 00:00:00 2001 From: abmajumder Date: Thu, 20 Feb 2025 17:11:04 +0530 Subject: [PATCH 3/7] refine versions --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index bd8077e2dd887..1ca7d7e87eb08 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -795,7 +795,7 @@ class APPLYPRIORITY_L2_INTRS : InstName # " [$addr], $size;", [(!cast("int_nvvm_" # !subst("::", "_", !subst(".", "_", InstName))) i64:$addr, i64:$size)]>, - Requires<[hasPTX<80>, hasSM<90>]>; + Requires<[hasPTX<74>, hasSM<80>]>; def APPLYPRIORITY_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"applypriority.L2::evict_normal">; def APPLYPRIORITY_GLOBAL_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"applypriority.global.L2::evict_normal">; From f6c258948762829c93d3c54a5fa507d701242bc7 Mon Sep 17 00:00:00 2001 From: abmajumder Date: Thu, 20 Feb 2025 17:13:07 +0530 Subject: [PATCH 4/7] refine versions --- llvm/test/CodeGen/NVPTX/applypriority.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/applypriority.ll b/llvm/test/CodeGen/NVPTX/applypriority.ll index 51998f4d850c2..c4410d7070c5e 100644 --- a/llvm/test/CodeGen/NVPTX/applypriority.ll +++ b/llvm/test/CodeGen/NVPTX/applypriority.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s -; RUN: %if ptxas-12.3 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %} +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx74| FileCheck --check-prefixes=CHECK-PTX64 %s +; RUN: %if ptxas-12.3 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx74| %ptxas-verify -arch=sm_80 %} target triple = "nvptx64-nvidia-cuda" From 2be2be20a9ec42db4394f0aa3c0a30ce41f7d917 Mon Sep 17 00:00:00 2001 From: abmajumder Date: Thu, 20 Feb 2025 17:27:45 +0530 Subject: [PATCH 5/7] refine versions --- llvm/test/CodeGen/NVPTX/applypriority.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/NVPTX/applypriority.ll b/llvm/test/CodeGen/NVPTX/applypriority.ll index c4410d7070c5e..af161d82a25ea 100644 --- a/llvm/test/CodeGen/NVPTX/applypriority.ll +++ b/llvm/test/CodeGen/NVPTX/applypriority.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx74| FileCheck --check-prefixes=CHECK-PTX64 %s -; RUN: %if ptxas-12.3 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx74| %ptxas-verify -arch=sm_80 %} +; RUN: %if ptxas-11.4 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx74| %ptxas-verify -arch=sm_80 %} target triple = "nvptx64-nvidia-cuda" From 53fa94de301fd8ae88a5c4cb2e443ec3c8982e6a Mon Sep 17 00:00:00 2001 From: abmajumder Date: Fri, 21 Feb 2025 11:22:03 +0530 Subject: [PATCH 6/7] refine --- llvm/include/llvm/IR/IntrinsicsNVVM.td | 12 ++++++------ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 14 +++++++++----- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index eff860bc3a850..12d43594a3df2 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -5044,14 +5044,14 @@ def int_nvvm_prefetchu_L1 : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly, ReadOnly>, NoCapture>]>; def int_nvvm_applypriority_global_L2_evict_normal - : Intrinsic<[], [llvm_global_ptr_ty, llvm_i64_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>, - ImmArg>]>; + : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty, llvm_i64_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>, + ImmArg>]>; def int_nvvm_applypriority_L2_evict_normal - : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], - [IntrArgMemOnly, ReadOnly>, NoCapture>, - ImmArg>]>; + : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>, + ImmArg>]>; // Intrinsics for Bulk Copy using TMA (non-tensor) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 1ca7d7e87eb08..83a7e17295f5d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -790,15 +790,19 @@ def PREFETCH_GLOBAL_L2_EVICT_LAST : NVPTXInst<(outs), (ins Int64Regs:$addr), def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1">; //Applypriority intrinsics -class APPLYPRIORITY_L2_INTRS : +class APPLYPRIORITY_STRINGS { + string InstName = StrJoin<".", ["applypriority", !if(!eq(addr, ""), "", addr) , "L2::evict_normal"]>.ret; + string IntrName = StrJoin<"_", ["int_nvvm_applypriority", !if(!eq(addr, ""), "", addr) , "L2_evict_normal"]>.ret; +} +class APPLYPRIORITY_L2_INTRS : NVPTXInst<(outs), (ins Int64Regs:$addr, Int64Regs:$size), - InstName # " [$addr], $size;", - [(!cast("int_nvvm_" # !subst("::", "_", !subst(".", "_", InstName))) + APPLYPRIORITY_STRINGS.InstName # " [$addr], $size;", + [(!cast(APPLYPRIORITY_STRINGS.IntrName) i64:$addr, i64:$size)]>, Requires<[hasPTX<74>, hasSM<80>]>; -def APPLYPRIORITY_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"applypriority.L2::evict_normal">; -def APPLYPRIORITY_GLOBAL_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"applypriority.global.L2::evict_normal">; +def APPLYPRIORITY_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"">; +def APPLYPRIORITY_GLOBAL_L2_EVICT_NORMAL : APPLYPRIORITY_L2_INTRS<"global">; //----------------------------------- // MBarrier Functions From f45366478549260827155474e70663fe98e81027 Mon Sep 17 00:00:00 2001 From: abmajumder Date: Fri, 21 Feb 2025 16:20:14 +0530 Subject: [PATCH 7/7] refine --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 83a7e17295f5d..1fd95abfab217 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -790,14 +790,10 @@ def PREFETCH_GLOBAL_L2_EVICT_LAST : NVPTXInst<(outs), (ins Int64Regs:$addr), def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1">; //Applypriority intrinsics -class APPLYPRIORITY_STRINGS { - string InstName = StrJoin<".", ["applypriority", !if(!eq(addr, ""), "", addr) , "L2::evict_normal"]>.ret; - string IntrName = StrJoin<"_", ["int_nvvm_applypriority", !if(!eq(addr, ""), "", addr) , "L2_evict_normal"]>.ret; -} class APPLYPRIORITY_L2_INTRS : NVPTXInst<(outs), (ins Int64Regs:$addr, Int64Regs:$size), - APPLYPRIORITY_STRINGS.InstName # " [$addr], $size;", - [(!cast(APPLYPRIORITY_STRINGS.IntrName) + StrJoin<".", ["applypriority", addr , "L2::evict_normal"]>.ret # " [$addr], $size;", + [(!cast(StrJoin<"_", ["int_nvvm_applypriority", addr , "L2_evict_normal"]>.ret) i64:$addr, i64:$size)]>, Requires<[hasPTX<74>, hasSM<80>]>;