Skip to content

Conversation

abhilash1910
Copy link
Contributor

@abhilash1910 abhilash1910 commented Sep 17, 2025

Context: Highlighted from #156830 , this is an Isel lowering issue in
the NVPTX backend for prefetch.tensormap intrinsic.

It is caused by unchecked pattern rewrite during infer-address-space pass.
This intrinsic is valid only for const, param and generic address-spaces.
Any other address space is invalid. Currently, this intrinsic gets falsely
re-written to target AS(1), when the pointer-argument of the intrinsic
comes as an argument of a kernel function.

So, this patch adds a check on the correct address-spaces
before re-writing them.

cc @durga4github

FYI: @Wolfram70 @rupprecht @castigli

@llvmbot
Copy link
Member

llvmbot commented Sep 17, 2025

@llvm/pr-subscribers-backend-nvptx

Author: Abhilash Majumder (abhilash1910)

Changes

Context: Highlighted from #156830 , there is an Isel lowering issue in NVPTX backend for prefetch.tensormap intrinsic which is caused due to unguarded pattern rewrite during infer address-space pass. it is observed that for ptx_kernel attributed kernel functions the rewriter for prefetch.tensormap intrinsics is not able to map to generic (addrspace 0) memory space as it implicitly gets allocated to global addresspace. This causes an Isel failure which is fixed by this PR. Should not involve any MLIR op changes as it is failing only at backend.

cc @durga4github

FYI: @Wolfram70 @rupprecht @castigli


Full diff: https://github.com/llvm/llvm-project/pull/159253.diff

3 Files Affected:

  • (modified) llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp (+4-3)
  • (modified) llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll (+43-2)
  • (modified) llvm/test/CodeGen/NVPTX/prefetch.ll (+42-1)
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index f4f89613b358d..b920da0d04203 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -588,10 +588,11 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
       return ConstantInt::get(II->getType(), *R);
     return nullptr;
   }
-  case Intrinsic::nvvm_prefetch_tensormap: {
-    IRBuilder<> Builder(II);
-    return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
+  const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+    if (NewAS == NVPTXAS::ADDRESS_SPACE_CONST || NewAS == NVPTXAS::ADDRESS_SPACE_PARAM)
+      return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
                                         NewV);
+    return nullptr;
   }
   }
   return nullptr;
diff --git a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
index bc67471209bf8..ed625876869cc 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
@@ -1,6 +1,8 @@
 ; RUN: opt < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix=INFER
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | FileCheck %s --check-prefix=PTX
 ; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | %ptxas-verify -arch=sm_90 %}
+; RUN: not llc -march=nvptx64 %s -o - 2>&1 | FileCheck %s --check-prefix=ERR
+; XFAIL: *
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 target triple = "nvptx64-unknown-unknown"
@@ -11,7 +13,6 @@ target triple = "nvptx64-unknown-unknown"
 define void @test_infer_const_from_cast() {
 ; INFER-LABEL: @test_infer_const_from_cast
 ; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
-; BOTH: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
 ; PTX-LABEL: .visible .func test_infer_const_from_cast(
 ; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
 ; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
@@ -69,7 +70,47 @@ entry:
   %cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
   %cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
   %cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
-  call void @llvm.nvvm.prefetch.tensormap(ptr %cast3)
+  call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
+  ret void
+}
+
+; Kernel Function Test
+; Cast from Const space to Generic
+define ptx_kernel void @test_const_to_generic_cast_kernel(ptr addrspace(4) %const_ptr) {
+; INFER-LABEL: @test_const_to_generic_cast_kernel
+; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
+; PTX-LABEL: .visible .entry test_const_to_generic_cast_kernel(
+; PTX: prefetch.const.tensormap [%rd{{[0-9]+}}];
+entry:
+  %cast = addrspacecast ptr addrspace(4) %const_ptr to ptr
+  call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
+  ret void
+}
+
+; Kernel Function Test
+; Multiple casts in sequence
+define ptx_kernel void @test_infer_through_multiple_casts_kernel() {
+; INFER-LABEL: @test_infer_through_multiple_casts_kernel
+; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
+; PTX-LABEL: .visible .entry test_infer_through_multiple_casts_kernel(
+; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
+; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
+; PTX: prefetch.tensormap [%rd{{[0-9]+}}];
+entry:
+  %cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
+  %cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
+  %cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
+  call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
+  ret void
+}
+
+
+; Negative test case for global to generic addrspace cast
+define void @test_global_to_generic_cast(ptr addrspace(1) %global_ptr) {
+; ERR: unsupported prefetch address space cast
+entry:
+  %cast = addrspacecast ptr addrspace(1) %global_ptr to ptr
+  call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll
index a1c5ec8f50a6b..0b1f0fdd5e85f 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch.ll
@@ -121,4 +121,45 @@ define void @prefetch_param_tensormap(ptr addrspace(101) %param_ptr) {
 ; CHECK-PTX64-NEXT:    ret;
   tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
   ret void
-}
\ No newline at end of file
+}
+
+define ptx_kernel void @prefetch_tensormap_kernel(ptr %ptr) {
+; CHECK-PTX64-LABEL: prefetch_tensormap_kernel(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [prefetch_tensormap_kernel_param_0];
+; CHECK-PTX64-NEXT:    prefetch.tensormap [%rd1];
+; CHECK-PTX64-NEXT:    ret;
+  tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr)
+  ret void
+}
+
+define ptx_kernel void @prefetch_const_tensormap_kernel(ptr addrspace(4) %const_ptr) {
+; CHECK-PTX64-LABEL: prefetch_const_tensormap_kernel(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [prefetch_const_tensormap_kernel_param_0];
+; CHECK-PTX64-NEXT:    prefetch.const.tensormap [%rd1];
+; CHECK-PTX64-NEXT:    ret;
+  tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
+  ret void
+}
+
+define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %param_ptr) {
+; CHECK-PTX64-LABEL: prefetch_param_tensormap_kernel(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [prefetch_param_tensormap_kernel_param_0];
+; CHECK-PTX64-NEXT:    prefetch.param.tensormap [%rd1];
+; CHECK-PTX64-NEXT:    ret;
+  tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
+  ret void
+}
+
+

Copy link

github-actions bot commented Sep 17, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@castigli
Copy link

Thanks @abhilash1910 for the patch!
I can confirm that this resolves the issue raised in #156830 (since there is no CI for sm_90!).

@Wolfram70
Copy link
Contributor

Thanks for the fix!

Copy link
Contributor

@durga4github durga4github left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@abhilash1910
Copy link
Contributor Author

@durga4github please help to merge, thanks.

@durga4github durga4github merged commit c1aa89e into llvm:main Sep 24, 2025
9 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 24, 2025

LLVM Buildbot has detected a new failure on builder cross-project-tests-sie-ubuntu-dwarf5 running on doug-worker-1b while building llvm at step 6 "test-build-unified-tree-check-cross-project".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/163/builds/27024

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-cross-project) failure: test (failure)
******************** TEST 'cross-project-tests :: debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp' FAILED ********************
Exit Code: 2

Command Output (stderr):
--
clang++ -O0 -glldb -std=gnu++11 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp -o /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/Output/expect_step_order.cpp.tmp # RUN: at line 7
+ clang++ -O0 -glldb -std=gnu++11 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp -o /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/Output/expect_step_order.cpp.tmp
"/usr/bin/python3.10" "/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/dexter.py" test --fail-lt 1.0 -w -v --debugger lldb-dap --lldb-executable "/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/lldb-dap" --dap-message-log=-e --binary /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/Output/expect_step_order.cpp.tmp -- /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp | /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/FileCheck /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp # RUN: at line 8
+ /usr/bin/python3.10 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/dexter.py test --fail-lt 1.0 -w -v --debugger lldb-dap --lldb-executable /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/lldb-dap --dap-message-log=-e --binary /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/Output/expect_step_order.cpp.tmp -- /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp
+ /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/FileCheck /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp
note: Opening DAP server: /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/lldb-dap
-> {
  "type": "request",
  "command": "initialize",
  "arguments": {
    "clientID": "dexter",
    "adapterID": "lldb-dap",
    "pathFormat": "path",
    "linesStartAt1": true,
    "columnsStartAt1": true,
    "supportsVariableType": true,
    "supportsVariablePaging": true,
    "supportsRunInTerminalRequest": false
  },
  "seq": 1
}
<- {
  "body": {
    "$__lldb_version": "lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision c1aa89ef9be2af6599a7a280fe6de646c2a5ca4a)\n  clang revision c1aa89ef9be2af6599a7a280fe6de646c2a5ca4a\n  llvm revision c1aa89ef9be2af6599a7a280fe6de646c2a5ca4a",
    "completionTriggerCharacters": [
      ".",
      " ",
      "\t"
    ],
    "exceptionBreakpointFilters": [
      {
        "description": "C++ Catch",
        "filter": "cpp_catch",
        "label": "C++ Catch",
        "supportsCondition": true
      },
      {
        "description": "C++ Throw",
        "filter": "cpp_throw",
        "label": "C++ Throw",
        "supportsCondition": true
      },
      {
        "description": "Objective-C Catch",
        "filter": "objc_catch",
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants