From be0d3afdfed00ea7964bb04039e5ac1ea4441987 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Wed, 17 Sep 2025 12:09:39 +0530 Subject: [PATCH 1/9] pattern rewriter fix --- .../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 7 +- .../CodeGen/NVPTX/prefetch-inferas-test.ll | 45 +++++++- llvm/test/CodeGen/NVPTX/prefetch.ll | 43 ++++++- llvm/test/CodeGen/NVPTX/prefetch.s | 105 ++++++++++++++++++ 4 files changed, 194 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/prefetch.s diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index f4f89613b358d..b920da0d04203 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -588,10 +588,11 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, return ConstantInt::get(II->getType(), *R); return nullptr; } - case Intrinsic::nvvm_prefetch_tensormap: { - IRBuilder<> Builder(II); - return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap, + const unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + if (NewAS == NVPTXAS::ADDRESS_SPACE_CONST || NewAS == NVPTXAS::ADDRESS_SPACE_PARAM) + return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap, NewV); + return nullptr; } } return nullptr; diff --git a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll index bc67471209bf8..ed625876869cc 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll @@ -1,6 +1,8 @@ ; RUN: opt < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix=INFER ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | FileCheck %s --check-prefix=PTX ; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | %ptxas-verify -arch=sm_90 %} +; RUN: not llc -march=nvptx64 %s -o - 2>&1 | FileCheck %s --check-prefix=ERR +; XFAIL: * target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx64-unknown-unknown" @@ -11,7 +13,6 @@ target triple = "nvptx64-unknown-unknown" define void @test_infer_const_from_cast() { ; INFER-LABEL: @test_infer_const_from_cast ; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap) -; BOTH: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap) ; PTX-LABEL: .visible .func test_infer_const_from_cast( ; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap; ; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}; @@ -69,7 +70,47 @@ entry: %cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr %cast2 = addrspacecast ptr %cast1 to ptr addrspace(4) %cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr - call void @llvm.nvvm.prefetch.tensormap(ptr %cast3) + call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3) + ret void +} + +; Kernel Function Test +; Cast from Const space to Generic +define ptx_kernel void @test_const_to_generic_cast_kernel(ptr addrspace(4) %const_ptr) { +; INFER-LABEL: @test_const_to_generic_cast_kernel +; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) +; PTX-LABEL: .visible .entry test_const_to_generic_cast_kernel( +; PTX: prefetch.const.tensormap [%rd{{[0-9]+}}]; +entry: + %cast = addrspacecast ptr addrspace(4) %const_ptr to ptr + call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast) + ret void +} + +; Kernel Function Test +; Multiple casts in sequence +define ptx_kernel void @test_infer_through_multiple_casts_kernel() { +; INFER-LABEL: @test_infer_through_multiple_casts_kernel +; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap) +; PTX-LABEL: .visible .entry test_infer_through_multiple_casts_kernel( +; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap; +; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}; +; PTX: prefetch.tensormap [%rd{{[0-9]+}}]; +entry: + %cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr + %cast2 = addrspacecast ptr %cast1 to ptr addrspace(4) + %cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr + call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3) + ret void +} + + +; Negative test case for global to generic addrspace cast +define void @test_global_to_generic_cast(ptr addrspace(1) %global_ptr) { +; ERR: unsupported prefetch address space cast +entry: + %cast = addrspacecast ptr addrspace(1) %global_ptr to ptr + call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast) ret void } diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll index a1c5ec8f50a6b..0b1f0fdd5e85f 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch.ll @@ -121,4 +121,45 @@ define void @prefetch_param_tensormap(ptr addrspace(101) %param_ptr) { ; CHECK-PTX64-NEXT: ret; tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr) ret void -} \ No newline at end of file +} + +define ptx_kernel void @prefetch_tensormap_kernel(ptr %ptr) { +; CHECK-PTX64-LABEL: prefetch_tensormap_kernel( +; CHECK-PTX64: { +; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK-PTX64-EMPTY: +; CHECK-PTX64-NEXT: // %bb.0: +; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_tensormap_kernel_param_0]; +; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1]; +; CHECK-PTX64-NEXT: ret; + tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) + ret void +} + +define ptx_kernel void @prefetch_const_tensormap_kernel(ptr addrspace(4) %const_ptr) { +; CHECK-PTX64-LABEL: prefetch_const_tensormap_kernel( +; CHECK-PTX64: { +; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK-PTX64-EMPTY: +; CHECK-PTX64-NEXT: // %bb.0: +; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_kernel_param_0]; +; CHECK-PTX64-NEXT: prefetch.const.tensormap [%rd1]; +; CHECK-PTX64-NEXT: ret; + tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) + ret void +} + +define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %param_ptr) { +; CHECK-PTX64-LABEL: prefetch_param_tensormap_kernel( +; CHECK-PTX64: { +; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK-PTX64-EMPTY: +; CHECK-PTX64-NEXT: // %bb.0: +; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_param_tensormap_kernel_param_0]; +; CHECK-PTX64-NEXT: prefetch.param.tensormap [%rd1]; +; CHECK-PTX64-NEXT: ret; + tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr) + ret void +} + + diff --git a/llvm/test/CodeGen/NVPTX/prefetch.s b/llvm/test/CodeGen/NVPTX/prefetch.s new file mode 100644 index 0000000000000..31d0ac68a1472 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/prefetch.s @@ -0,0 +1,105 @@ +// +// Generated by LLVM NVPTX Back-End +// + +.version 8.0 +.target sm_90a +.address_size 64 + + // .globl prefetch_local // -- Begin function prefetch_local + // @prefetch_local +.visible .func prefetch_local( + .param .b64 prefetch_local_param_0 +) +{ + .reg .b64 %rd<2>; + +// %bb.0: + ld.param.b64 %rd1, [prefetch_local_param_0]; + prefetch.local.L1 [%rd1]; + prefetch.local.L2 [%rd1]; + ret; + // -- End function +} + // .globl prefetch_global // -- Begin function prefetch_global +.visible .func prefetch_global( + .param .b64 prefetch_global_param_0 +) // @prefetch_global +{ + .reg .b64 %rd<2>; + +// %bb.0: + ld.param.b64 %rd1, [prefetch_global_param_0]; + prefetch.global.L1 [%rd1]; + prefetch.global.L2 [%rd1]; + prefetch.global.L2::evict_normal [%rd1]; + prefetch.global.L2::evict_last [%rd1]; + ret; + // -- End function +} + // .globl prefetch_ // -- Begin function prefetch_ +.visible .func prefetch_( + .param .b64 prefetch__param_0 +) // @prefetch_ +{ + .reg .b64 %rd<2>; + +// %bb.0: + ld.param.b64 %rd1, [prefetch__param_0]; + prefetch.L1 [%rd1]; + prefetch.L2 [%rd1]; + ret; + // -- End function +} + // .globl prefetchu_l1 // -- Begin function prefetchu_l1 +.visible .func prefetchu_l1( + .param .b64 prefetchu_l1_param_0 +) // @prefetchu_l1 +{ + .reg .b64 %rd<2>; + +// %bb.0: + ld.param.b64 %rd1, [prefetchu_l1_param_0]; + prefetchu.L1 [%rd1]; + ret; + // -- End function +} + // .globl prefetch_tensormap // -- Begin function prefetch_tensormap +.visible .func prefetch_tensormap( + .param .b64 prefetch_tensormap_param_0 +) // @prefetch_tensormap +{ + .reg .b64 %rd<2>; + +// %bb.0: + ld.param.b64 %rd1, [prefetch_tensormap_param_0]; + prefetch.tensormap [%rd1]; + ret; + // -- End function +} + // .globl prefetch_const_tensormap // -- Begin function prefetch_const_tensormap +.visible .func prefetch_const_tensormap( + .param .b64 prefetch_const_tensormap_param_0 +) // @prefetch_const_tensormap +{ + .reg .b64 %rd<2>; + +// %bb.0: + ld.param.b64 %rd1, [prefetch_const_tensormap_param_0]; + prefetch.const.tensormap [%rd1]; + ret; + // -- End function +} + // .globl prefetch_param_tensormap // -- Begin function prefetch_param_tensormap +.visible .func prefetch_param_tensormap( + .param .b64 prefetch_param_tensormap_param_0 +) // @prefetch_param_tensormap +{ + .reg .b64 %rd<2>; + +// %bb.0: + ld.param.b64 %rd1, [prefetch_param_tensormap_param_0]; + prefetch.param.tensormap [%rd1]; + ret; + // -- End function +} From 32bf60a241dae7482a94d7cbc60777378313d930 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Wed, 17 Sep 2025 12:19:02 +0530 Subject: [PATCH 2/9] remove s --- llvm/test/CodeGen/NVPTX/prefetch.s | 105 ----------------------------- 1 file changed, 105 deletions(-) delete mode 100644 llvm/test/CodeGen/NVPTX/prefetch.s diff --git a/llvm/test/CodeGen/NVPTX/prefetch.s b/llvm/test/CodeGen/NVPTX/prefetch.s deleted file mode 100644 index 31d0ac68a1472..0000000000000 --- a/llvm/test/CodeGen/NVPTX/prefetch.s +++ /dev/null @@ -1,105 +0,0 @@ -// -// Generated by LLVM NVPTX Back-End -// - -.version 8.0 -.target sm_90a -.address_size 64 - - // .globl prefetch_local // -- Begin function prefetch_local - // @prefetch_local -.visible .func prefetch_local( - .param .b64 prefetch_local_param_0 -) -{ - .reg .b64 %rd<2>; - -// %bb.0: - ld.param.b64 %rd1, [prefetch_local_param_0]; - prefetch.local.L1 [%rd1]; - prefetch.local.L2 [%rd1]; - ret; - // -- End function -} - // .globl prefetch_global // -- Begin function prefetch_global -.visible .func prefetch_global( - .param .b64 prefetch_global_param_0 -) // @prefetch_global -{ - .reg .b64 %rd<2>; - -// %bb.0: - ld.param.b64 %rd1, [prefetch_global_param_0]; - prefetch.global.L1 [%rd1]; - prefetch.global.L2 [%rd1]; - prefetch.global.L2::evict_normal [%rd1]; - prefetch.global.L2::evict_last [%rd1]; - ret; - // -- End function -} - // .globl prefetch_ // -- Begin function prefetch_ -.visible .func prefetch_( - .param .b64 prefetch__param_0 -) // @prefetch_ -{ - .reg .b64 %rd<2>; - -// %bb.0: - ld.param.b64 %rd1, [prefetch__param_0]; - prefetch.L1 [%rd1]; - prefetch.L2 [%rd1]; - ret; - // -- End function -} - // .globl prefetchu_l1 // -- Begin function prefetchu_l1 -.visible .func prefetchu_l1( - .param .b64 prefetchu_l1_param_0 -) // @prefetchu_l1 -{ - .reg .b64 %rd<2>; - -// %bb.0: - ld.param.b64 %rd1, [prefetchu_l1_param_0]; - prefetchu.L1 [%rd1]; - ret; - // -- End function -} - // .globl prefetch_tensormap // -- Begin function prefetch_tensormap -.visible .func prefetch_tensormap( - .param .b64 prefetch_tensormap_param_0 -) // @prefetch_tensormap -{ - .reg .b64 %rd<2>; - -// %bb.0: - ld.param.b64 %rd1, [prefetch_tensormap_param_0]; - prefetch.tensormap [%rd1]; - ret; - // -- End function -} - // .globl prefetch_const_tensormap // -- Begin function prefetch_const_tensormap -.visible .func prefetch_const_tensormap( - .param .b64 prefetch_const_tensormap_param_0 -) // @prefetch_const_tensormap -{ - .reg .b64 %rd<2>; - -// %bb.0: - ld.param.b64 %rd1, [prefetch_const_tensormap_param_0]; - prefetch.const.tensormap [%rd1]; - ret; - // -- End function -} - // .globl prefetch_param_tensormap // -- Begin function prefetch_param_tensormap -.visible .func prefetch_param_tensormap( - .param .b64 prefetch_param_tensormap_param_0 -) // @prefetch_param_tensormap -{ - .reg .b64 %rd<2>; - -// %bb.0: - ld.param.b64 %rd1, [prefetch_param_tensormap_param_0]; - prefetch.param.tensormap [%rd1]; - ret; - // -- End function -} From bcf73651b6544059dd249fad69a8f7f365a5df94 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Wed, 17 Sep 2025 12:33:00 +0530 Subject: [PATCH 3/9] typos and format --- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index b920da0d04203..b5bf72e45038a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -588,10 +588,13 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, return ConstantInt::get(II->getType(), *R); return nullptr; } - const unsigned NewAS = NewV->getType()->getPointerAddressSpace(); - if (NewAS == NVPTXAS::ADDRESS_SPACE_CONST || NewAS == NVPTXAS::ADDRESS_SPACE_PARAM) + case Intrinsic::nvvm_prefetch_tensormap: { + IRBuilder<> Builder(II); + const unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + if (NewAS == NVPTXAS::ADDRESS_SPACE_CONST || + NewAS == NVPTXAS::ADDRESS_SPACE_PARAM) return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap, - NewV); + NewV); return nullptr; } } From 789cd0f2e3b2c9f4febf74d2dc1077a0219b05bd Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Sat, 20 Sep 2025 14:34:07 +0530 Subject: [PATCH 4/9] refine tests --- .../CodeGen/NVPTX/prefetch-inferas-test.ll | 25 ++++++++++--------- llvm/test/CodeGen/NVPTX/prefetch.ll | 6 ++--- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll index ed625876869cc..66fb181c4f5fc 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll @@ -1,8 +1,6 @@ ; RUN: opt < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix=INFER ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | FileCheck %s --check-prefix=PTX ; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | %ptxas-verify -arch=sm_90 %} -; RUN: not llc -march=nvptx64 %s -o - 2>&1 | FileCheck %s --check-prefix=ERR -; XFAIL: * target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx64-unknown-unknown" @@ -87,6 +85,19 @@ entry: ret void } +; Kernel Function Test +; Cast from Param space to Generic +define ptx_kernel void @test_param_to_generic_cast_kernel(ptr addrspace(101) %param_ptr) { +; INFER-LABEL: @test_param_to_generic_cast_kernel +; INFER: call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr) +; PTX-LABEL: .visible .entry test_param_to_generic_cast_kernel( +; PTX: prefetch.param.tensormap [%rd{{[0-9]+}}]; +entry: + %cast = addrspacecast ptr addrspace(101) %param_ptr to ptr + call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast) + ret void +} + ; Kernel Function Test ; Multiple casts in sequence define ptx_kernel void @test_infer_through_multiple_casts_kernel() { @@ -104,16 +115,6 @@ entry: ret void } - -; Negative test case for global to generic addrspace cast -define void @test_global_to_generic_cast(ptr addrspace(1) %global_ptr) { -; ERR: unsupported prefetch address space cast -entry: - %cast = addrspacecast ptr addrspace(1) %global_ptr to ptr - call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast) - ret void -} - declare void @llvm.nvvm.prefetch.tensormap.p0(ptr) declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4)) declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101)) diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll index 0b1f0fdd5e85f..9bdb2d6bc78c9 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch.ll @@ -123,13 +123,13 @@ define void @prefetch_param_tensormap(ptr addrspace(101) %param_ptr) { ret void } -define ptx_kernel void @prefetch_tensormap_kernel(ptr %ptr) { -; CHECK-PTX64-LABEL: prefetch_tensormap_kernel( +define ptx_kernel void @prefetch_generic_tensormap_kernel(ptr %ptr) { +; CHECK-PTX64-LABEL: prefetch_generic_tensormap_kernel( ; CHECK-PTX64: { ; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK-PTX64-EMPTY: ; CHECK-PTX64-NEXT: // %bb.0: -; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_tensormap_kernel_param_0]; +; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_generic_tensormap_kernel_param_0]; ; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1]; ; CHECK-PTX64-NEXT: ret; tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr) From b7c60350eb94e69de81fc73f6538208c5b102b29 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 23 Sep 2025 14:02:14 +0530 Subject: [PATCH 5/9] add gridconst --- .../CodeGen/NVPTX/prefetch-inferas-test.ll | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll index 66fb181c4f5fc..1518dfe905d95 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll @@ -115,6 +115,46 @@ entry: ret void } + +define ptx_kernel void @prefetch_grid_const_tensormap(ptr byval([64 x i8]) align 64 "nvvm.grid_constant" %const_ptr) { +; INFER-LABEL: @prefetch_grid_const_tensormap( +; INFER-SAME: ptr byval([64 x i8]) align 64 "nvvm.grid_constant" [[CONST_PTR:%.*]]) +; INFER: [[CWRAP:%.*]] = call align 64 ptr addrspace(4) @llvm.nvvm.internal.addrspace.wrap.p4.p0(ptr [[CONST_PTR]]) +; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) [[CWRAP]]) +; INFER: ret void + +; PTX-LABEL: .visible .entry prefetch_grid_const_tensormap( +; PTX: mov.b64 %rd{{[0-9]+}}, prefetch_grid_const_tensormap_param_0; +; PTX: cvta.param.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}; +; PTX: prefetch.const.tensormap [%rd{{[0-9]+}}]; +; PTX: ret; + +entry: + %cwrap = call align 64 ptr addrspace(4) @llvm.nvvm.internal.addrspace.wrap.p4.p0(ptr %const_ptr) + call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %cwrap) + ret void +} + +define ptx_kernel void @prefetch_grid_param_tensormap(ptr byval([64 x i8]) align 64 "nvvm.grid_constant" %param_ptr) { +; INFER-LABEL: @prefetch_grid_param_tensormap( +; INFER-SAME: ptr byval([64 x i8]) align 64 "nvvm.grid_constant" [[PARAM_PTR:%.*]]) +; INFER: [[CWRAP:%.*]] = call align 64 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAM_PTR]]) +; INFER: call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) [[CWRAP]]) +; INFER: ret void + +; PTX-LABEL: .visible .entry prefetch_grid_param_tensormap( +; PTX: mov.b64 %rd{{[0-9]+}}, prefetch_grid_param_tensormap_param_0; +; PTX: cvta.param.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}; +; PTX: prefetch.param.tensormap [%rd{{[0-9]+}}]; +; PTX: ret; + +entry: + %cwrap = call align 64 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr %param_ptr) + call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %cwrap) + ret void +} + + declare void @llvm.nvvm.prefetch.tensormap.p0(ptr) declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4)) declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101)) From 65fa57676ef638363e6be39778e3fd7553a7dbe1 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 23 Sep 2025 16:46:29 +0530 Subject: [PATCH 6/9] refresh --- .../CodeGen/NVPTX/prefetch-inferas-test.ll | 40 ------------------- llvm/test/CodeGen/NVPTX/prefetch.ll | 11 +++++ 2 files changed, 11 insertions(+), 40 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll index 1518dfe905d95..66fb181c4f5fc 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll @@ -115,46 +115,6 @@ entry: ret void } - -define ptx_kernel void @prefetch_grid_const_tensormap(ptr byval([64 x i8]) align 64 "nvvm.grid_constant" %const_ptr) { -; INFER-LABEL: @prefetch_grid_const_tensormap( -; INFER-SAME: ptr byval([64 x i8]) align 64 "nvvm.grid_constant" [[CONST_PTR:%.*]]) -; INFER: [[CWRAP:%.*]] = call align 64 ptr addrspace(4) @llvm.nvvm.internal.addrspace.wrap.p4.p0(ptr [[CONST_PTR]]) -; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) [[CWRAP]]) -; INFER: ret void - -; PTX-LABEL: .visible .entry prefetch_grid_const_tensormap( -; PTX: mov.b64 %rd{{[0-9]+}}, prefetch_grid_const_tensormap_param_0; -; PTX: cvta.param.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}; -; PTX: prefetch.const.tensormap [%rd{{[0-9]+}}]; -; PTX: ret; - -entry: - %cwrap = call align 64 ptr addrspace(4) @llvm.nvvm.internal.addrspace.wrap.p4.p0(ptr %const_ptr) - call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %cwrap) - ret void -} - -define ptx_kernel void @prefetch_grid_param_tensormap(ptr byval([64 x i8]) align 64 "nvvm.grid_constant" %param_ptr) { -; INFER-LABEL: @prefetch_grid_param_tensormap( -; INFER-SAME: ptr byval([64 x i8]) align 64 "nvvm.grid_constant" [[PARAM_PTR:%.*]]) -; INFER: [[CWRAP:%.*]] = call align 64 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAM_PTR]]) -; INFER: call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) [[CWRAP]]) -; INFER: ret void - -; PTX-LABEL: .visible .entry prefetch_grid_param_tensormap( -; PTX: mov.b64 %rd{{[0-9]+}}, prefetch_grid_param_tensormap_param_0; -; PTX: cvta.param.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}; -; PTX: prefetch.param.tensormap [%rd{{[0-9]+}}]; -; PTX: ret; - -entry: - %cwrap = call align 64 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr %param_ptr) - call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %cwrap) - ret void -} - - declare void @llvm.nvvm.prefetch.tensormap.p0(ptr) declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4)) declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101)) diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll index 9bdb2d6bc78c9..e27215e78d586 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch.ll @@ -162,4 +162,15 @@ define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %para ret void } +define ptx_kernel void @prefetch_grid_const_tensormap(ptr byval([64 x i8]) align 64 "nvvm.grid_constant" %ptr) { +; CHECK-PTX64-LABEL: .visible .entry prefetch_grid_const_tensormap( +; CHECK-PTX64: mov.b64 %rd{{[0-9]+}}, prefetch_grid_const_tensormap_param_0; +; CHECK-PTX64: cvta.param.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}; +; CHECK-PTX64: prefetch.tensormap [%rd{{[0-9]+}}]; +; CHECK-PTX64: ret; + +entry: + call void @llvm.nvvm.prefetch.tensormap.p0(ptr addrspace(0) %ptr) + ret void +} From b12b04768a6adabf7a863420b055a1393d6f5b28 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 23 Sep 2025 16:50:11 +0530 Subject: [PATCH 7/9] format --- llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll | 2 -- llvm/test/CodeGen/NVPTX/prefetch.ll | 11 +++++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll index 66fb181c4f5fc..d2f2bb4325d73 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll @@ -118,5 +118,3 @@ entry: declare void @llvm.nvvm.prefetch.tensormap.p0(ptr) declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4)) declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101)) - - diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll index e27215e78d586..87d1e79193fa0 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch.ll @@ -163,14 +163,13 @@ define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %para } define ptx_kernel void @prefetch_grid_const_tensormap(ptr byval([64 x i8]) align 64 "nvvm.grid_constant" %ptr) { -; CHECK-PTX64-LABEL: .visible .entry prefetch_grid_const_tensormap( -; CHECK-PTX64: mov.b64 %rd{{[0-9]+}}, prefetch_grid_const_tensormap_param_0; -; CHECK-PTX64: cvta.param.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}; -; CHECK-PTX64: prefetch.tensormap [%rd{{[0-9]+}}]; -; CHECK-PTX64: ret; +; CHECK-PTX64-LABEL: .visible .entry prefetch_grid_const_tensormap( +; CHECK-PTX64: mov.b64 %rd{{[0-9]+}}, prefetch_grid_const_tensormap_param_0; +; CHECK-PTX64: cvta.param.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}; +; CHECK-PTX64: prefetch.tensormap [%rd{{[0-9]+}}]; +; CHECK-PTX64: ret; entry: call void @llvm.nvvm.prefetch.tensormap.p0(ptr addrspace(0) %ptr) ret void } - From 33840a877d5d72bbb888f9dbc4fab76e1c698786 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 23 Sep 2025 17:18:53 +0530 Subject: [PATCH 8/9] constant pointers passed as entry function parameter cannot use cvta.const --- llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll | 13 ------------- llvm/test/CodeGen/NVPTX/prefetch.ll | 13 ------------- 2 files changed, 26 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll index d2f2bb4325d73..32b55a38e55ef 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll @@ -72,19 +72,6 @@ entry: ret void } -; Kernel Function Test -; Cast from Const space to Generic -define ptx_kernel void @test_const_to_generic_cast_kernel(ptr addrspace(4) %const_ptr) { -; INFER-LABEL: @test_const_to_generic_cast_kernel -; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) -; PTX-LABEL: .visible .entry test_const_to_generic_cast_kernel( -; PTX: prefetch.const.tensormap [%rd{{[0-9]+}}]; -entry: - %cast = addrspacecast ptr addrspace(4) %const_ptr to ptr - call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast) - ret void -} - ; Kernel Function Test ; Cast from Param space to Generic define ptx_kernel void @test_param_to_generic_cast_kernel(ptr addrspace(101) %param_ptr) { diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll index 87d1e79193fa0..b0967df2d2f96 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch.ll @@ -136,19 +136,6 @@ define ptx_kernel void @prefetch_generic_tensormap_kernel(ptr %ptr) { ret void } -define ptx_kernel void @prefetch_const_tensormap_kernel(ptr addrspace(4) %const_ptr) { -; CHECK-PTX64-LABEL: prefetch_const_tensormap_kernel( -; CHECK-PTX64: { -; CHECK-PTX64-NEXT: .reg .b64 %rd<2>; -; CHECK-PTX64-EMPTY: -; CHECK-PTX64-NEXT: // %bb.0: -; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_kernel_param_0]; -; CHECK-PTX64-NEXT: prefetch.const.tensormap [%rd1]; -; CHECK-PTX64-NEXT: ret; - tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr) - ret void -} - define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %param_ptr) { ; CHECK-PTX64-LABEL: prefetch_param_tensormap_kernel( ; CHECK-PTX64: { From a6d16eb5c8db9cf04c1676c905744914804634d3 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder Date: Tue, 23 Sep 2025 20:18:37 +0530 Subject: [PATCH 9/9] refresh --- llvm/test/CodeGen/NVPTX/prefetch.ll | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll index b0967df2d2f96..c0489cc6fd73a 100644 --- a/llvm/test/CodeGen/NVPTX/prefetch.ll +++ b/llvm/test/CodeGen/NVPTX/prefetch.ll @@ -151,9 +151,7 @@ define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %para define ptx_kernel void @prefetch_grid_const_tensormap(ptr byval([64 x i8]) align 64 "nvvm.grid_constant" %ptr) { ; CHECK-PTX64-LABEL: .visible .entry prefetch_grid_const_tensormap( -; CHECK-PTX64: mov.b64 %rd{{[0-9]+}}, prefetch_grid_const_tensormap_param_0; -; CHECK-PTX64: cvta.param.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}; -; CHECK-PTX64: prefetch.tensormap [%rd{{[0-9]+}}]; +; CHECK-PTX64: prefetch.tensormap [%{{(SP|rd[0-9]+).*}}]; ; CHECK-PTX64: ret; entry: