[NVPTX] prefetch.tensormap pattern rewriter fix #159253

abhilash1910 · 2025-09-17T06:49:57Z

Context: Highlighted from #156830 , this is an Isel lowering issue in
the NVPTX backend for prefetch.tensormap intrinsic.

It is caused by unchecked pattern rewrite during infer-address-space pass.
This intrinsic is valid only for const, param and generic address-spaces.
Any other address space is invalid. Currently, this intrinsic gets falsely
re-written to target AS(1), when the pointer-argument of the intrinsic
comes as an argument of a kernel function.

So, this patch adds a check on the correct address-spaces
before re-writing them.

cc @durga4github

FYI: @Wolfram70 @rupprecht @castigli

llvmbot · 2025-09-17T06:50:28Z

@llvm/pr-subscribers-backend-nvptx

Author: Abhilash Majumder (abhilash1910)

Changes

Context: Highlighted from #156830 , there is an Isel lowering issue in NVPTX backend for prefetch.tensormap intrinsic which is caused due to unguarded pattern rewrite during infer address-space pass. it is observed that for ptx_kernel attributed kernel functions the rewriter for prefetch.tensormap intrinsics is not able to map to generic (addrspace 0) memory space as it implicitly gets allocated to global addresspace. This causes an Isel failure which is fixed by this PR. Should not involve any MLIR op changes as it is failing only at backend.

cc @durga4github

FYI: @Wolfram70 @rupprecht @castigli

Full diff: https://github.com/llvm/llvm-project/pull/159253.diff

3 Files Affected:

(modified) llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp (+4-3)
(modified) llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll (+43-2)
(modified) llvm/test/CodeGen/NVPTX/prefetch.ll (+42-1)

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index f4f89613b358d..b920da0d04203 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -588,10 +588,11 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
       return ConstantInt::get(II->getType(), *R);
     return nullptr;
   }
-  case Intrinsic::nvvm_prefetch_tensormap: {
-    IRBuilder<> Builder(II);
-    return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
+  const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+    if (NewAS == NVPTXAS::ADDRESS_SPACE_CONST || NewAS == NVPTXAS::ADDRESS_SPACE_PARAM)
+      return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
                                         NewV);
+    return nullptr;
   }
   }
   return nullptr;
diff --git a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
index bc67471209bf8..ed625876869cc 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll
@@ -1,6 +1,8 @@
 ; RUN: opt < %s -S -passes=infer-address-spaces | FileCheck %s --check-prefix=INFER
 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | FileCheck %s --check-prefix=PTX
 ; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | %ptxas-verify -arch=sm_90 %}
+; RUN: not llc -march=nvptx64 %s -o - 2>&1 | FileCheck %s --check-prefix=ERR
+; XFAIL: *
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 target triple = "nvptx64-unknown-unknown"
@@ -11,7 +13,6 @@ target triple = "nvptx64-unknown-unknown"
 define void @test_infer_const_from_cast() {
 ; INFER-LABEL: @test_infer_const_from_cast
 ; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
-; BOTH: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
 ; PTX-LABEL: .visible .func test_infer_const_from_cast(
 ; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
 ; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
@@ -69,7 +70,47 @@ entry:
   %cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
   %cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
   %cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
-  call void @llvm.nvvm.prefetch.tensormap(ptr %cast3)
+  call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
+  ret void
+}
+
+; Kernel Function Test
+; Cast from Const space to Generic
+define ptx_kernel void @test_const_to_generic_cast_kernel(ptr addrspace(4) %const_ptr) {
+; INFER-LABEL: @test_const_to_generic_cast_kernel
+; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
+; PTX-LABEL: .visible .entry test_const_to_generic_cast_kernel(
+; PTX: prefetch.const.tensormap [%rd{{[0-9]+}}];
+entry:
+  %cast = addrspacecast ptr addrspace(4) %const_ptr to ptr
+  call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
+  ret void
+}
+
+; Kernel Function Test
+; Multiple casts in sequence
+define ptx_kernel void @test_infer_through_multiple_casts_kernel() {
+; INFER-LABEL: @test_infer_through_multiple_casts_kernel
+; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
+; PTX-LABEL: .visible .entry test_infer_through_multiple_casts_kernel(
+; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
+; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
+; PTX: prefetch.tensormap [%rd{{[0-9]+}}];
+entry:
+  %cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
+  %cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
+  %cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
+  call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
+  ret void
+}
+
+
+; Negative test case for global to generic addrspace cast
+define void @test_global_to_generic_cast(ptr addrspace(1) %global_ptr) {
+; ERR: unsupported prefetch address space cast
+entry:
+  %cast = addrspacecast ptr addrspace(1) %global_ptr to ptr
+  call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/NVPTX/prefetch.ll b/llvm/test/CodeGen/NVPTX/prefetch.ll
index a1c5ec8f50a6b..0b1f0fdd5e85f 100644
--- a/llvm/test/CodeGen/NVPTX/prefetch.ll
+++ b/llvm/test/CodeGen/NVPTX/prefetch.ll
@@ -121,4 +121,45 @@ define void @prefetch_param_tensormap(ptr addrspace(101) %param_ptr) {
 ; CHECK-PTX64-NEXT:    ret;
   tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
   ret void
-}
\ No newline at end of file
+}
+
+define ptx_kernel void @prefetch_tensormap_kernel(ptr %ptr) {
+; CHECK-PTX64-LABEL: prefetch_tensormap_kernel(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [prefetch_tensormap_kernel_param_0];
+; CHECK-PTX64-NEXT:    prefetch.tensormap [%rd1];
+; CHECK-PTX64-NEXT:    ret;
+  tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr)
+  ret void
+}
+
+define ptx_kernel void @prefetch_const_tensormap_kernel(ptr addrspace(4) %const_ptr) {
+; CHECK-PTX64-LABEL: prefetch_const_tensormap_kernel(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [prefetch_const_tensormap_kernel_param_0];
+; CHECK-PTX64-NEXT:    prefetch.const.tensormap [%rd1];
+; CHECK-PTX64-NEXT:    ret;
+  tail call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) %const_ptr)
+  ret void
+}
+
+define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %param_ptr) {
+; CHECK-PTX64-LABEL: prefetch_param_tensormap_kernel(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [prefetch_param_tensormap_kernel_param_0];
+; CHECK-PTX64-NEXT:    prefetch.param.tensormap [%rd1];
+; CHECK-PTX64-NEXT:    ret;
+  tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
+  ret void
+}
+
+

github-actions · 2025-09-17T06:54:28Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/test/CodeGen/NVPTX/prefetch.ll

llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll

llvm/test/CodeGen/NVPTX/prefetch.ll

castigli · 2025-09-17T13:44:42Z

Thanks @abhilash1910 for the patch!
I can confirm that this resolves the issue raised in #156830 (since there is no CI for sm_90!).

Wolfram70 · 2025-09-18T06:36:29Z

Thanks for the fix!

llvm/test/CodeGen/NVPTX/prefetch.ll

…const

durga4github

LGTM

abhilash1910 · 2025-09-24T09:56:13Z

@durga4github please help to merge, thanks.

llvm-ci · 2025-09-24T10:04:04Z

LLVM Buildbot has detected a new failure on builder cross-project-tests-sie-ubuntu-dwarf5 running on doug-worker-1b while building llvm at step 6 "test-build-unified-tree-check-cross-project".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/163/builds/27024

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-cross-project) failure: test (failure)
******************** TEST 'cross-project-tests :: debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp' FAILED ********************
Exit Code: 2

Command Output (stderr):
--
clang++ -O0 -glldb -std=gnu++11 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp -o /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/Output/expect_step_order.cpp.tmp # RUN: at line 7
+ clang++ -O0 -glldb -std=gnu++11 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp -o /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/Output/expect_step_order.cpp.tmp
"/usr/bin/python3.10" "/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/dexter.py" test --fail-lt 1.0 -w -v --debugger lldb-dap --lldb-executable "/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/lldb-dap" --dap-message-log=-e --binary /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/Output/expect_step_order.cpp.tmp -- /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp | /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/FileCheck /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp # RUN: at line 8
+ /usr/bin/python3.10 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/dexter.py test --fail-lt 1.0 -w -v --debugger lldb-dap --lldb-executable /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/lldb-dap --dap-message-log=-e --binary /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/Output/expect_step_order.cpp.tmp -- /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp
+ /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/FileCheck /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp
note: Opening DAP server: /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/lldb-dap
-> {
  "type": "request",
  "command": "initialize",
  "arguments": {
    "clientID": "dexter",
    "adapterID": "lldb-dap",
    "pathFormat": "path",
    "linesStartAt1": true,
    "columnsStartAt1": true,
    "supportsVariableType": true,
    "supportsVariablePaging": true,
    "supportsRunInTerminalRequest": false
  },
  "seq": 1
}
<- {
  "body": {
    "$__lldb_version": "lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision c1aa89ef9be2af6599a7a280fe6de646c2a5ca4a)\n  clang revision c1aa89ef9be2af6599a7a280fe6de646c2a5ca4a\n  llvm revision c1aa89ef9be2af6599a7a280fe6de646c2a5ca4a",
    "completionTriggerCharacters": [
      ".",
      " ",
      "\t"
    ],
    "exceptionBreakpointFilters": [
      {
        "description": "C++ Catch",
        "filter": "cpp_catch",
        "label": "C++ Catch",
        "supportsCondition": true
      },
      {
        "description": "C++ Throw",
        "filter": "cpp_throw",
        "label": "C++ Throw",
        "supportsCondition": true
      },
      {
        "description": "Objective-C Catch",
        "filter": "objc_catch",
...

abhilash1910 added 2 commits September 17, 2025 12:09

pattern rewriter fix

be0d3af

remove s

32bf60a

llvmbot added the backend:NVPTX label Sep 17, 2025

abhilash1910 mentioned this pull request Sep 17, 2025

[MLIR][NVVM][NVGPU] Combine prefetch and prefetch.tensormap #153134

Merged

typos and format

bcf7365

durga4github reviewed Sep 17, 2025

View reviewed changes

llvm/test/CodeGen/NVPTX/prefetch.ll Outdated Show resolved Hide resolved

durga4github reviewed Sep 17, 2025

View reviewed changes

llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll Outdated Show resolved Hide resolved

durga4github reviewed Sep 17, 2025

View reviewed changes

llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll Show resolved Hide resolved

durga4github requested a review from AlexMaclean September 17, 2025 08:36

durga4github reviewed Sep 17, 2025

View reviewed changes

llvm/test/CodeGen/NVPTX/prefetch.ll Show resolved Hide resolved

abhilash1910 added 4 commits September 20, 2025 14:34

refine tests

789cd0f

add gridconst

b7c6035

refresh

65fa576

format

b12b047

durga4github reviewed Sep 23, 2025

View reviewed changes

llvm/test/CodeGen/NVPTX/prefetch.ll Show resolved Hide resolved

constant pointers passed as entry function parameter cannot use cvta.…

33840a8

…const

durga4github approved these changes Sep 23, 2025

View reviewed changes

refresh

a6d16eb

durga4github merged commit c1aa89e into llvm:main Sep 24, 2025
9 checks passed

castigli mentioned this pull request Oct 2, 2025

[NVGPU] Fix nvdsl examples #156830

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[NVPTX] prefetch.tensormap pattern rewriter fix #159253

[NVPTX] prefetch.tensormap pattern rewriter fix #159253

Uh oh!

abhilash1910 commented Sep 17, 2025 •

edited by durga4github

Loading

Uh oh!

llvmbot commented Sep 17, 2025

Uh oh!

github-actions bot commented Sep 17, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

castigli commented Sep 17, 2025

Uh oh!

Wolfram70 commented Sep 18, 2025

Uh oh!

Uh oh!

durga4github left a comment

Uh oh!

abhilash1910 commented Sep 24, 2025

Uh oh!

Uh oh!

llvm-ci commented Sep 24, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

[NVPTX] prefetch.tensormap pattern rewriter fix #159253

[NVPTX] prefetch.tensormap pattern rewriter fix #159253

Uh oh!

Conversation

abhilash1910 commented Sep 17, 2025 • edited by durga4github Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Sep 17, 2025

Uh oh!

github-actions bot commented Sep 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

castigli commented Sep 17, 2025

Uh oh!

Wolfram70 commented Sep 18, 2025

Uh oh!

Uh oh!

durga4github left a comment

Choose a reason for hiding this comment

Uh oh!

abhilash1910 commented Sep 24, 2025

Uh oh!

Uh oh!

llvm-ci commented Sep 24, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

abhilash1910 commented Sep 17, 2025 •

edited by durga4github

Loading

github-actions bot commented Sep 17, 2025 •

edited

Loading