-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[NVPTX] prefetch.tensormap pattern rewriter fix #159253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-nvptx Author: Abhilash Majumder (abhilash1910) ChangesContext: Highlighted from #156830 , there is an Isel lowering issue in NVPTX backend for prefetch.tensormap intrinsic which is caused due to unguarded pattern rewrite during infer address-space pass. it is observed that for ptx_kernel attributed kernel functions the rewriter for prefetch.tensormap intrinsics is not able to map to generic (addrspace 0) memory space as it implicitly gets allocated to global addresspace. This causes an Isel failure which is fixed by this PR. Should not involve any MLIR op changes as it is failing only at backend. cc @durga4github FYI: @Wolfram70 @rupprecht @castigli Full diff: https://github.com/llvm/llvm-project/pull/159253.diff 3 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index f4f89613b358d..b920da0d04203 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -588,10 +588,11 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
return ConstantInt::get(II->getType(), *R);
return nullptr;
}
- case Intrinsic::nvvm_prefetch_tensormap: {
- IRBuilder<> Builder(II);
- return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
+ const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+ if (NewAS == NVPTXAS::ADDRESS_SPACE_CONST || NewAS == NVPTXAS::ADDRESS_SPACE_PARAM)
+ return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
NewV);
+ return nullptr;
}
}
return nullptr;
diff --git a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
index bc67471209bf8..ed625876869cc 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
@@ -1,6 +1,8 @@
; RUN: opt < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix=INFER
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | FileCheck %s --check-prefix=PTX
; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | %ptxas-verify -arch=sm_90 %}
+; RUN: not llc -march=nvptx64 %s -o - 2>&1 | FileCheck %s --check-prefix=ERR
+; XFAIL: *
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
target triple = "nvptx64-unknown-unknown"
@@ -11,7 +13,6 @@ target triple = "nvptx64-unknown-unknown"
define void @test_infer_const_from_cast() {
; INFER-LABEL: @test_infer_const_from_cast
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
-; BOTH: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
; PTX-LABEL: .visible .func test_infer_const_from_cast(
; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
@@ -69,7 +70,47 @@ entry:
%cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
%cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
%cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
- call void @llvm.nvvm.prefetch.tensormap(ptr %cast3)
+ call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
+ ret void
+}
+
+; Kernel Function Test
+; Cast from Const space to Generic
+define ptx_kernel void @test_const_to_generic_cast_kernel(ptr addrspace(4) %const_ptr) {
+; INFER-LABEL: @test_const_to_generic_cast_kernel
+; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
+; PTX-LABEL: .visible .entry test_const_to_generic_cast_kernel(
+; PTX: prefetch.const.tensormap [%rd{{[0-9]+}}];
+entry:
+ %cast = addrspacecast ptr addrspace(4) %const_ptr to ptr
+ call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
+ ret void
+}
+
+; Kernel Function Test
+; Multiple casts in sequence
+define ptx_kernel void @test_infer_through_multiple_casts_kernel() {
+; INFER-LABEL: @test_infer_through_multiple_casts_kernel
+; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
+; PTX-LABEL: .visible .entry test_infer_through_multiple_casts_kernel(
+; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
+; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
+; PTX: prefetch.tensormap [%rd{{[0-9]+}}];
+entry:
+ %cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
+ %cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
+ %cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
+ call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
+ ret void
+}
+
+
+; Negative test case for global to generic addrspace cast
+define void @test_global_to_generic_cast(ptr addrspace(1) %global_ptr) {
+; ERR: unsupported prefetch address space cast
+entry:
+ %cast = addrspacecast ptr addrspace(1) %global_ptr to ptr
+ call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
ret void
}
diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll
index a1c5ec8f50a6b..0b1f0fdd5e85f 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch.ll
@@ -121,4 +121,45 @@ define void @prefetch_param_tensormap(ptr addrspace(101) %param_ptr) {
; CHECK-PTX64-NEXT: ret;
tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
ret void
-}
\ No newline at end of file
+}
+
+define ptx_kernel void @prefetch_tensormap_kernel(ptr %ptr) {
+; CHECK-PTX64-LABEL: prefetch_tensormap_kernel(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_tensormap_kernel_param_0];
+; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1];
+; CHECK-PTX64-NEXT: ret;
+ tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr)
+ ret void
+}
+
+define ptx_kernel void @prefetch_const_tensormap_kernel(ptr addrspace(4) %const_ptr) {
+; CHECK-PTX64-LABEL: prefetch_const_tensormap_kernel(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_const_tensormap_kernel_param_0];
+; CHECK-PTX64-NEXT: prefetch.const.tensormap [%rd1];
+; CHECK-PTX64-NEXT: ret;
+ tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
+ ret void
+}
+
+define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %param_ptr) {
+; CHECK-PTX64-LABEL: prefetch_param_tensormap_kernel(
+; CHECK-PTX64: {
+; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT: // %bb.0:
+; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_param_tensormap_kernel_param_0];
+; CHECK-PTX64-NEXT: prefetch.param.tensormap [%rd1];
+; CHECK-PTX64-NEXT: ret;
+ tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
+ ret void
+}
+
+
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Thanks @abhilash1910 for the patch! |
Thanks for the fix! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@durga4github please help to merge, thanks. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/163/builds/27024 Here is the relevant piece of the build log for the reference
|
Context: Highlighted from #156830 , this is an Isel lowering issue in
the NVPTX backend for prefetch.tensormap intrinsic.
It is caused by unchecked pattern rewrite during infer-address-space pass.
This intrinsic is valid only for const, param and generic address-spaces.
Any other address space is invalid. Currently, this intrinsic gets falsely
re-written to target AS(1), when the pointer-argument of the intrinsic
comes as an argument of a kernel function.
So, this patch adds a check on the correct address-spaces
before re-writing them.
cc @durga4github
FYI: @Wolfram70 @rupprecht @castigli