Skip to content

Commit 2be73ec

Browse files
committed
address comments
1 parent 3e4b9c3 commit 2be73ec

File tree

2 files changed

+20
-18
lines changed

2 files changed

+20
-18
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3196,6 +3196,9 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
31963196
SDLoc DL(N);
31973197
const EVT ValVT = Val.getValueType();
31983198
const EVT MemVT = N->getMemoryVT();
3199+
3200+
// If we're truncating as part of the store, avoid lowering to a StoreV node.
3201+
// TODO: consider relaxing this restriction.
31993202
if (ValVT != MemVT)
32003203
return SDValue();
32013204

@@ -5767,6 +5770,9 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
57675770
LoadSDNode *LD = cast<LoadSDNode>(N);
57685771
const EVT ResVT = LD->getValueType(0);
57695772
const EVT MemVT = LD->getMemoryVT();
5773+
5774+
// If we're doing sign/zero extension as part of the load, avoid lowering to
5775+
// a LoadV node. TODO: consider relaxing this restriction.
57705776
if (ResVT != MemVT)
57715777
return;
57725778

llvm/test/CodeGen/NVPTX/i128-ld-st.ll

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,21 @@
44

55
target triple = "nvptx64-nvidia-cuda"
66

7-
define i128 @foo() {
7+
define i128 @foo(ptr %p, ptr %o) {
88
; CHECK-LABEL: foo(
99
; CHECK: {
10-
; CHECK-NEXT: .reg .b64 %rd<3>;
10+
; CHECK-NEXT: .reg .b64 %rd<5>;
1111
; CHECK-EMPTY:
12-
; CHECK-NEXT: // %bb.0: // %entry
13-
; CHECK-NEXT: bra.uni $L__BB0_1;
14-
; CHECK-NEXT: $L__BB0_1: // %while.cond
15-
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
16-
; CHECK-NEXT: mov.b64 %rd1, 0;
17-
; CHECK-NEXT: ld.u8 %rd2, [%rd1];
18-
; CHECK-NEXT: st.v2.u64 [%rd1], {%rd2, %rd1};
19-
; CHECK-NEXT: bra.uni $L__BB0_1;
20-
entry:
21-
br label %while.cond
22-
23-
while.cond: ; preds = %while.cond, %entry
24-
%0 = load i8, ptr null, align 1
25-
%conv = zext i8 %0 to i128
26-
store i128 %conv, ptr null, align 16
27-
br label %while.cond
12+
; CHECK-NEXT: // %bb.0:
13+
; CHECK-NEXT: ld.param.u64 %rd2, [foo_param_1];
14+
; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0];
15+
; CHECK-NEXT: ld.u8 %rd3, [%rd1];
16+
; CHECK-NEXT: mov.b64 %rd4, 0;
17+
; CHECK-NEXT: st.v2.u64 [%rd2], {%rd3, %rd4};
18+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd4};
19+
; CHECK-NEXT: ret;
20+
%c = load i8, ptr %p, align 1
21+
%i = zext i8 %c to i128
22+
store i128 %i, ptr %o, align 16
23+
ret i128 %i
2824
}

0 commit comments

Comments
 (0)