[NVPTX] Fixup EXT_LOAD lowering for i128 values #138049

AlexMaclean · 2025-04-30T23:14:17Z

Ensure that when custom lowering a vector load/store to a multi-output load/store node we confirm that the memory value type matches the type used by the node. Also add some asserts for basic sanity checking of load size.

Fixes #138034

llvmbot · 2025-04-30T23:14:50Z

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/138049.diff

3 Files Affected:

(modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (+13)
(modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+15-12)
(added) llvm/test/CodeGen/NVPTX/i128-ld-st.ll (+28)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 295ed666a1902..57f35a827d9cf 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MathExtras.h"
 #include <optional>
 
 using namespace llvm;
@@ -1141,6 +1142,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
   else
     FromType = getLdStRegType(ScalarVT);
 
+  assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
+         FromTypeWidth <= 128 && "Invalid width for load");
+
   // Create the machine instruction DAG
   SDValue Offset, Base;
   SelectADDR(N->getOperand(1), Base, Offset);
@@ -1236,6 +1240,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
     FromType = NVPTX::PTXLdStInstCode::Untyped;
   }
 
+  assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
+         FromTypeWidth <= 128 && TotalWidth <= 128 && "Invalid width for load");
+
   SDValue Offset, Base;
   SelectADDR(N->getOperand(1), Base, Offset);
   SDValue Ops[] = {getI32Imm(Ordering, DL),
@@ -1453,6 +1460,9 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
   // Create the machine instruction DAG
   SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
 
+  assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 &&
+         ToTypeWidth <= 128 && "Invalid width for store");
+
   SDValue Offset, Base;
   SelectADDR(ST->getBasePtr(), Base, Offset);
 
@@ -1537,6 +1547,9 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
     ToType = NVPTX::PTXLdStInstCode::Untyped;
   }
 
+  assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 &&
+         ToTypeWidth <= 128 && TotalWidth <= 128 && "Invalid width for store");
+
   SDValue Offset, Base;
   SelectADDR(N2, Base, Offset);
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index c41741ed10232..2f24667cb3cde 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -3191,20 +3191,22 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue
 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
-  SDNode *N = Op.getNode();
+  MemSDNode *N = cast<MemSDNode>(Op.getNode());
   SDValue Val = N->getOperand(1);
   SDLoc DL(N);
-  EVT ValVT = Val.getValueType();
+  const EVT ValVT = Val.getValueType();
+  const EVT MemVT = N->getMemoryVT();
+  if (ValVT != MemVT)
+    return SDValue();
 
   const auto NumEltsAndEltVT = getVectorLoweringShape(ValVT);
   if (!NumEltsAndEltVT)
     return SDValue();
   const auto [NumElts, EltVT] = NumEltsAndEltVT.value();
 
-  MemSDNode *MemSD = cast<MemSDNode>(N);
   const DataLayout &TD = DAG.getDataLayout();
 
-  Align Alignment = MemSD->getAlign();
+  Align Alignment = N->getAlign();
   Align PrefAlign = TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext()));
   if (Alignment < PrefAlign) {
     // This store is not sufficiently aligned, so bail out and let this vector
@@ -3267,7 +3269,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue NewSt =
       DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops,
-                              MemSD->getMemoryVT(), MemSD->getMemOperand());
+                              N->getMemoryVT(), N->getMemOperand());
 
   // return DCI.CombineTo(N, NewSt, true);
   return NewSt;
@@ -5762,20 +5764,20 @@ static void ReplaceBITCAST(SDNode *Node, SelectionDAG &DAG,
 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
                               SmallVectorImpl<SDValue> &Results) {
-  const EVT ResVT = N->getValueType(0);
-  SDLoc DL(N);
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  const EVT ResVT = LD->getValueType(0);
+  const EVT MemVT = LD->getMemoryVT();
+  if (ResVT != MemVT)
+    return;
 
   const auto NumEltsAndEltVT = getVectorLoweringShape(ResVT);
   if (!NumEltsAndEltVT)
     return;
   const auto [NumElts, EltVT] = NumEltsAndEltVT.value();
 
-  LoadSDNode *LD = cast<LoadSDNode>(N);
-
   Align Alignment = LD->getAlign();
   const auto &TD = DAG.getDataLayout();
-  Align PrefAlign =
-      TD.getPrefTypeAlign(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+  Align PrefAlign = TD.getPrefTypeAlign(MemVT.getTypeForEVT(*DAG.getContext()));
   if (Alignment < PrefAlign) {
     // This load is not sufficiently aligned, so bail out and let this vector
     // load be scalarized.  Note that we may still be able to emit smaller
@@ -5806,9 +5808,10 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
     break;
   }
   }
+  SDLoc DL(LD);
 
   // Copy regular operands
-  SmallVector<SDValue, 8> OtherOps(N->ops());
+  SmallVector<SDValue, 8> OtherOps(LD->ops());
 
   // The select routine does not have access to the LoadSDNode instance, so
   // pass along the extension information
diff --git a/llvm/test/CodeGen/NVPTX/i128-ld-st.ll b/llvm/test/CodeGen/NVPTX/i128-ld-st.ll
new file mode 100644
index 0000000000000..f3d04f133a8d4
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i128-ld-st.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -O0 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -O0 -mcpu=sm_20 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i128 @foo() {
+; CHECK-LABEL: foo(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    bra.uni $L__BB0_1;
+; CHECK-NEXT:  $L__BB0_1: // %while.cond
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    mov.b64 %rd1, 0;
+; CHECK-NEXT:    ld.u8 %rd2, [%rd1];
+; CHECK-NEXT:    st.v2.u64 [%rd1], {%rd2, %rd1};
+; CHECK-NEXT:    bra.uni $L__BB0_1;
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %0 = load i8, ptr null, align 1
+  %conv = zext i8 %0 to i128
+  store i128 %conv, ptr null, align 16
+  br label %while.cond
+}

github-actions · 2025-04-30T23:16:38Z

✅ With the latest revision this PR passed the C/C++ code formatter.

jhuber6

Thanks, looks straightforward enough to me but someone more familiar with the NVPTX backend might know more.

Artem-B

LGTM, with minor nits.
Thank you for the quick fix!

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Artem-B · 2025-04-30T23:44:31Z

llvm/test/CodeGen/NVPTX/i128-ld-st.ll

+; CHECK-NEXT:    ld.u8 %rd2, [%rd1];
+; CHECK-NEXT:    st.v2.u64 [%rd1], {%rd2, %rd1};
+; CHECK-NEXT:    bra.uni $L__BB0_1;
+entry:


the test case could be cleaned up a bit, dropping the loop and using valid pointers, so there are no questions of whether the input is valid.

E.g. https://godbolt.org/z/EsvbY3dav

llvm-ci · 2025-05-01T04:04:46Z

LLVM Buildbot has detected a new failure on builder openmp-s390x-linux running on systemz-1 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/11125

Here is the relevant piece of the build log for the reference

Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libomp :: tasking/issue-94260-2.c' FAILED ********************
Exit Code: -11

Command Output (stdout):
--
# RUN: at line 1
/home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp   -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic && /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11

--

********************

Ensure that when custom lowering a vector load/store to a multi-output load/store node we confirm that the memory value type matches the type used by the node. Also add some asserts for basic sanity checking of load size. Fixes llvm#138034

AlexMaclean requested review from Artem-B, jhuber6 and justinfargnoli April 30, 2025 23:14

AlexMaclean self-assigned this Apr 30, 2025

llvmbot added the backend:NVPTX label Apr 30, 2025

[NVPTX] Fixup EXT_LOAD lowering for i128 values

3e4b9c3

AlexMaclean force-pushed the dev/amaclean/upstream/quick-fix-i128 branch from 403c693 to 3e4b9c3 Compare April 30, 2025 23:17

jhuber6 approved these changes Apr 30, 2025

View reviewed changes

Artem-B approved these changes Apr 30, 2025

View reviewed changes

address comments

2be73ec

Artem-B approved these changes May 1, 2025

View reviewed changes

AlexMaclean merged commit 6efdcc1 into llvm:main May 1, 2025
11 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[NVPTX] Fixup EXT_LOAD lowering for i128 values #138049

[NVPTX] Fixup EXT_LOAD lowering for i128 values #138049

Uh oh!

AlexMaclean commented Apr 30, 2025 •

edited

Loading

Uh oh!

llvmbot commented Apr 30, 2025

Uh oh!

github-actions bot commented Apr 30, 2025 •

edited

Loading

Uh oh!

jhuber6 left a comment

Uh oh!

Artem-B left a comment

Uh oh!

Uh oh!

Uh oh!

Artem-B Apr 30, 2025

Uh oh!

Uh oh!

llvm-ci commented May 1, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

[NVPTX] Fixup EXT_LOAD lowering for i128 values #138049

[NVPTX] Fixup EXT_LOAD lowering for i128 values #138049

Uh oh!

Conversation

AlexMaclean commented Apr 30, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Apr 30, 2025

Uh oh!

github-actions bot commented Apr 30, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jhuber6 left a comment

Choose a reason for hiding this comment

Uh oh!

Artem-B left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Artem-B Apr 30, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented May 1, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

AlexMaclean commented Apr 30, 2025 •

edited

Loading

github-actions bot commented Apr 30, 2025 •

edited

Loading