Skip to content

Conversation

@AlexMaclean
Copy link
Member

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Aug 15, 2025

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/153730.diff

2 Files Affected:

  • (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+16-9)
  • (added) llvm/test/CodeGen/NVPTX/cse-mov-sym.ll (+59)
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index ebb5e32f5e6fc..18b16b350fa0c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1510,18 +1510,22 @@ let hasSideEffects = false in {
                                     "mov.b64 \t$d, __local_depot$num;">;
 }
 
-
-// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
-let hasSideEffects = false, isAsCheapAsAMove = true in {
-  let isMoveReg = true in
+let hasSideEffects = false in {
+  let isMoveReg = true, isAsCheapAsAMove = true in
     class MOVr<RegisterClass RC, string OpStr> :
       BasicNVPTXInst<(outs RC:$dst), (ins RC:$src), "mov." # OpStr>;
 
-  let isMoveImm = true in
+  let isMoveImm = true, isAsCheapAsAMove = true in
     class MOVi<RegTyInfo t, string suffix> :
       BasicNVPTXInst<(outs t.RC:$dst), (ins t.Imm:$src),
               "mov." # suffix,
               [(set t.Ty:$dst, t.ImmNode:$src)]>;
+
+  // We don't want to set isAsCheapAsAMove to true for these instructions as
+  // this would prevent CSE and resulted in regressions.
+  class MovSymInst<RegTyInfo t> :
+    BasicNVPTXInst<(outs t.RC:$dst), (ins Operand<t.Ty>:$src),
+                   "mov.b" # t.Size>;
 }
 
 def MOV_B1_r : MOVr<B1, "pred">;
@@ -1539,6 +1543,9 @@ def MOV_BF16_i : MOVi<BF16RT, "b16">;
 def MOV_F32_i  : MOVi<F32RT, "b32">;
 def MOV_F64_i  : MOVi<F64RT, "b64">;
 
+def MOV_B32_sym : MovSymInst<I32RT>;
+def MOV_B64_sym : MovSymInst<I64RT>;
+
 
 def to_tglobaladdr : SDNodeXForm<globaladdr, [{
   return CurDAG->getTargetGlobalAddress(N->getGlobal(), SDLoc(N),
@@ -1555,11 +1562,11 @@ def to_tframeindex : SDNodeXForm<frameindex, [{
   return CurDAG->getTargetFrameIndex(N->getIndex(), N->getValueType(0));
 }]>;
 
-def : Pat<(i32 globaladdr:$dst), (MOV_B32_i (to_tglobaladdr $dst))>;
-def : Pat<(i64 globaladdr:$dst), (MOV_B64_i (to_tglobaladdr $dst))>;
+def : Pat<(i32 globaladdr:$dst), (MOV_B32_sym (to_tglobaladdr $dst))>;
+def : Pat<(i64 globaladdr:$dst), (MOV_B64_sym (to_tglobaladdr $dst))>;
 
-def : Pat<(i32 externalsym:$dst), (MOV_B32_i (to_texternsym $dst))>;
-def : Pat<(i64 externalsym:$dst), (MOV_B64_i (to_texternsym $dst))>;
+def : Pat<(i32 externalsym:$dst), (MOV_B32_sym (to_texternsym $dst))>;
+def : Pat<(i64 externalsym:$dst), (MOV_B64_sym (to_texternsym $dst))>;
 
 //---- Copy Frame Index ----
 def LEA_ADDRi :   NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
diff --git a/llvm/test/CodeGen/NVPTX/cse-mov-sym.ll b/llvm/test/CodeGen/NVPTX/cse-mov-sym.ll
new file mode 100644
index 0000000000000..31ecce5a66b64
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/cse-mov-sym.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+
+target triple = "nvptx64-nvidia-cuda"
+
+@global_smem = external addrspace(3) global [0 x i8], align 16
+
+
+;; Confirm the mov.b64 of global_smem is CSE'd. We need to make things a bit
+;; complex with a loop to make this interesting.
+define i32 @test_mov_sym(i32 %offset1, i32 %offset2, i1 %cond) {
+; CHECK-LABEL: test_mov_sym(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-NEXT:    .reg .b32 %r<8>;
+; CHECK-NEXT:    .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    ld.param.b8 %rs1, [test_mov_sym_param_2];
+; CHECK-NEXT:    and.b16 %rs2, %rs1, 1;
+; CHECK-NEXT:    setp.ne.b16 %p1, %rs2, 0;
+; CHECK-NEXT:    ld.param.b32 %r4, [test_mov_sym_param_0];
+; CHECK-NEXT:    cvt.s64.s32 %rd1, %r4;
+; CHECK-NEXT:    mov.b64 %rd2, global_smem;
+; CHECK-NEXT:    add.s64 %rd3, %rd2, %rd1;
+; CHECK-NEXT:    ld.shared.b32 %r7, [%rd3];
+; CHECK-NEXT:    not.pred %p2, %p1;
+; CHECK-NEXT:    @%p2 bra $L__BB0_4;
+; CHECK-NEXT:  // %bb.1: // %if1.preheader
+; CHECK-NEXT:    ld.param.b32 %r5, [test_mov_sym_param_1];
+; CHECK-NEXT:    setp.ne.b32 %p3, %r4, %r5;
+; CHECK-NEXT:  $L__BB0_2: // %if1
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    @%p3 bra $L__BB0_2;
+; CHECK-NEXT:  // %bb.3: // %if2
+; CHECK-NEXT:    cvt.s64.s32 %rd4, %r5;
+; CHECK-NEXT:    add.s64 %rd6, %rd2, %rd4;
+; CHECK-NEXT:    ld.shared.b32 %r6, [%rd6];
+; CHECK-NEXT:    add.s32 %r7, %r7, %r6;
+; CHECK-NEXT:  $L__BB0_4: // %end
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r7;
+; CHECK-NEXT:    ret;
+entry:
+    %gep = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 %offset1
+    %val = load i32, ptr addrspace(3) %gep
+    br i1 %cond, label %if1, label %end
+if1:
+    %cond2 = icmp eq i32 %offset1, %offset2
+    br i1 %cond2, label %if2, label %if1
+if2:
+    %gep2 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 %offset2
+    %val2 = load i32, ptr addrspace(3) %gep2
+    %add = add i32 %val, %val2
+    br label %end
+end:
+    %ret = phi i32 [ %add, %if2 ], [ %val, %entry ]
+    ret i32 %ret
+}

Copy link
Contributor

@ThomasRaoux ThomasRaoux left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks!

[(set t.Ty:$dst, t.ImmNode:$src)]>;

// We don't want to set isAsCheapAsAMove to true for these instructions as
// this would prevent CSE and resulted in regressions.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd mention the thread discussing the context of the issue.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added!

@AlexMaclean AlexMaclean merged commit bc77363 into llvm:main Aug 15, 2025
9 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 15, 2025

LLVM Buildbot has detected a new failure on builder lldb-aarch64-ubuntu running on linaro-lldb-aarch64-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/22683

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: lang/c/local_variables/TestLocalVariables.py (756 of 2309)
PASS: lldb-api :: lang/c/offsetof/TestOffsetof.py (757 of 2309)
PASS: lldb-api :: lang/c/non-mangled/TestCNonMangled.py (758 of 2309)
XFAIL: lldb-api :: lang/c/modules/TestCModules.py (759 of 2309)
PASS: lldb-api :: lang/c/record_decl_in_expr/TestRecordDeclInExpr.py (760 of 2309)
PASS: lldb-api :: lang/c/parray_vrs_char_array/TestParrayVrsCharArrayChild.py (761 of 2309)
PASS: lldb-api :: lang/c/sizeof/TestCSizeof.py (762 of 2309)
PASS: lldb-api :: lang/c/register_variables/TestRegisterVariables.py (763 of 2309)
PASS: lldb-api :: lang/c/set_values/TestSetValues.py (764 of 2309)
UNRESOLVED: lldb-api :: functionalities/statusline/TestStatusline.py (765 of 2309)
******************** TEST 'lldb-api :: functionalities/statusline/TestStatusline.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --arch aarch64 --build-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --cmake-build-type Release /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/functionalities/statusline -p TestStatusline.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision bc773632355b3cebde350b0341624e88be40b744)
  clang revision bc773632355b3cebde350b0341624e88be40b744
  llvm revision bc773632355b3cebde350b0341624e88be40b744
Skipping the following test categories: ['libc++', 'msvcstl', 'dsym', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test (TestStatusline.TestStatusline)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_deadlock (TestStatusline.TestStatusline)
lldb-server exiting...
FAIL: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_modulelist_deadlock (TestStatusline.TestStatusline)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_no_color (TestStatusline.TestStatusline)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_no_target (TestStatusline.TestStatusline)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_resize (TestStatusline.TestStatusline)
======================================================================
ERROR: test_modulelist_deadlock (TestStatusline.TestStatusline)
   Regression test for a deadlock that occurs when the status line is enabled before connecting
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/functionalities/statusline/TestStatusline.py", line 199, in test_modulelist_deadlock
    self.expect(
  File "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/packages/Python/lldbsuite/test/lldbpexpect.py", line 95, in expect
    self.expect_prompt()
  File "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/packages/Python/lldbsuite/test/lldbpexpect.py", line 19, in expect_prompt
    self.child.expect_exact(self.PROMPT)
  File "/usr/local/lib/python3.10/dist-packages/pexpect/spawnbase.py", line 432, in expect_exact
    return exp.expect_loop(timeout)
  File "/usr/local/lib/python3.10/dist-packages/pexpect/expect.py", line 179, in expect_loop
    return self.eof(e)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants