-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[NVPTX] Fixup EXT_LOAD lowering for i128 values #138049
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NVPTX] Fixup EXT_LOAD lowering for i128 values #138049
Conversation
|
@llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesFull diff: https://github.com/llvm/llvm-project/pull/138049.diff 3 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 295ed666a1902..57f35a827d9cf 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MathExtras.h"
#include <optional>
using namespace llvm;
@@ -1141,6 +1142,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
else
FromType = getLdStRegType(ScalarVT);
+ assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
+ FromTypeWidth <= 128 && "Invalid width for load");
+
// Create the machine instruction DAG
SDValue Offset, Base;
SelectADDR(N->getOperand(1), Base, Offset);
@@ -1236,6 +1240,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
FromType = NVPTX::PTXLdStInstCode::Untyped;
}
+ assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
+ FromTypeWidth <= 128 && TotalWidth <= 128 && "Invalid width for load");
+
SDValue Offset, Base;
SelectADDR(N->getOperand(1), Base, Offset);
SDValue Ops[] = {getI32Imm(Ordering, DL),
@@ -1453,6 +1460,9 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
// Create the machine instruction DAG
SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
+ assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 &&
+ ToTypeWidth <= 128 && "Invalid width for store");
+
SDValue Offset, Base;
SelectADDR(ST->getBasePtr(), Base, Offset);
@@ -1537,6 +1547,9 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
ToType = NVPTX::PTXLdStInstCode::Untyped;
}
+ assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 &&
+ ToTypeWidth <= 128 && TotalWidth <= 128 && "Invalid width for store");
+
SDValue Offset, Base;
SelectADDR(N2, Base, Offset);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index c41741ed10232..2f24667cb3cde 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -3191,20 +3191,22 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue
NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
- SDNode *N = Op.getNode();
+ MemSDNode *N = cast<MemSDNode>(Op.getNode());
SDValue Val = N->getOperand(1);
SDLoc DL(N);
- EVT ValVT = Val.getValueType();
+ const EVT ValVT = Val.getValueType();
+ const EVT MemVT = N->getMemoryVT();
+ if (ValVT != MemVT)
+ return SDValue();
const auto NumEltsAndEltVT = getVectorLoweringShape(ValVT);
if (!NumEltsAndEltVT)
return SDValue();
const auto [NumElts, EltVT] = NumEltsAndEltVT.value();
- MemSDNode *MemSD = cast<MemSDNode>(N);
const DataLayout &TD = DAG.getDataLayout();
- Align Alignment = MemSD->getAlign();
+ Align Alignment = N->getAlign();
Align PrefAlign = TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext()));
if (Alignment < PrefAlign) {
// This store is not sufficiently aligned, so bail out and let this vector
@@ -3267,7 +3269,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
SDValue NewSt =
DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops,
- MemSD->getMemoryVT(), MemSD->getMemOperand());
+ N->getMemoryVT(), N->getMemOperand());
// return DCI.CombineTo(N, NewSt, true);
return NewSt;
@@ -5762,20 +5764,20 @@ static void ReplaceBITCAST(SDNode *Node, SelectionDAG &DAG,
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) {
- const EVT ResVT = N->getValueType(0);
- SDLoc DL(N);
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ const EVT ResVT = LD->getValueType(0);
+ const EVT MemVT = LD->getMemoryVT();
+ if (ResVT != MemVT)
+ return;
const auto NumEltsAndEltVT = getVectorLoweringShape(ResVT);
if (!NumEltsAndEltVT)
return;
const auto [NumElts, EltVT] = NumEltsAndEltVT.value();
- LoadSDNode *LD = cast<LoadSDNode>(N);
-
Align Alignment = LD->getAlign();
const auto &TD = DAG.getDataLayout();
- Align PrefAlign =
- TD.getPrefTypeAlign(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+ Align PrefAlign = TD.getPrefTypeAlign(MemVT.getTypeForEVT(*DAG.getContext()));
if (Alignment < PrefAlign) {
// This load is not sufficiently aligned, so bail out and let this vector
// load be scalarized. Note that we may still be able to emit smaller
@@ -5806,9 +5808,10 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
break;
}
}
+ SDLoc DL(LD);
// Copy regular operands
- SmallVector<SDValue, 8> OtherOps(N->ops());
+ SmallVector<SDValue, 8> OtherOps(LD->ops());
// The select routine does not have access to the LoadSDNode instance, so
// pass along the extension information
diff --git a/llvm/test/CodeGen/NVPTX/i128-ld-st.ll b/llvm/test/CodeGen/NVPTX/i128-ld-st.ll
new file mode 100644
index 0000000000000..f3d04f133a8d4
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i128-ld-st.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -O0 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -O0 -mcpu=sm_20 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i128 @foo() {
+; CHECK-LABEL: foo(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: bra.uni $L__BB0_1;
+; CHECK-NEXT: $L__BB0_1: // %while.cond
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: mov.b64 %rd1, 0;
+; CHECK-NEXT: ld.u8 %rd2, [%rd1];
+; CHECK-NEXT: st.v2.u64 [%rd1], {%rd2, %rd1};
+; CHECK-NEXT: bra.uni $L__BB0_1;
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %entry
+ %0 = load i8, ptr null, align 1
+ %conv = zext i8 %0 to i128
+ store i128 %conv, ptr null, align 16
+ br label %while.cond
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
403c693 to
3e4b9c3
Compare
jhuber6
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, looks straightforward enough to me but someone more familiar with the NVPTX backend might know more.
Artem-B
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, with minor nits.
Thank you for the quick fix!
| ; CHECK-NEXT: ld.u8 %rd2, [%rd1]; | ||
| ; CHECK-NEXT: st.v2.u64 [%rd1], {%rd2, %rd1}; | ||
| ; CHECK-NEXT: bra.uni $L__BB0_1; | ||
| entry: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the test case could be cleaned up a bit, dropping the loop and using valid pointers, so there are no questions of whether the input is valid.
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/11125 Here is the relevant piece of the build log for the reference |
Ensure that when custom lowering a vector load/store to a multi-output load/store node we confirm that the memory value type matches the type used by the node. Also add some asserts for basic sanity checking of load size. Fixes llvm#138034
Ensure that when custom lowering a vector load/store to a multi-output load/store node we confirm that the memory value type matches the type used by the node. Also add some asserts for basic sanity checking of load size. Fixes llvm#138034
Ensure that when custom lowering a vector load/store to a multi-output load/store node we confirm that the memory value type matches the type used by the node. Also add some asserts for basic sanity checking of load size.
Fixes #138034