Skip to content

Commit 868b4c1

Browse files
committed
process i128 array with custom ComputePTXValueVTs
1 parent 00b50c9 commit 868b4c1

File tree

2 files changed

+36
-0
lines changed

2 files changed

+36
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,17 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
198198
return;
199199
}
200200

201+
// Given an array type, recursively traverse the elements with custom ComputePTXValueVTs.
202+
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
203+
Type *EltTy = ATy->getElementType();
204+
uint64_t EltSize = DL.getTypeAllocSize(EltTy);
205+
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
206+
ComputePTXValueVTs(TLI, DL, EltTy, ValueVTs, Offsets,
207+
StartingOffset + i * EltSize);
208+
}
209+
return;
210+
}
211+
201212
ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
202213
for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
203214
EVT VT = TempVTs[i];
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
3+
4+
define [2 x i128] @foo(i64 %a, i32 %b) {
5+
; CHECK-LABEL: foo(
6+
; CHECK: {
7+
; CHECK-NEXT: .reg .b32 %r<2>;
8+
; CHECK-NEXT: .reg .b64 %rd<5>;
9+
; CHECK-EMPTY:
10+
; CHECK-NEXT: // %bb.0:
11+
; CHECK-NEXT: ld.param.u32 %r1, [foo_param_1];
12+
; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0];
13+
; CHECK-NEXT: shr.s64 %rd2, %rd1, 63;
14+
; CHECK-NEXT: cvt.s64.s32 %rd3, %r1;
15+
; CHECK-NEXT: shr.s64 %rd4, %rd3, 63;
16+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
17+
; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4};
18+
; CHECK-NEXT: ret;
19+
%1 = sext i64 %a to i128
20+
%2 = sext i32 %b to i128
21+
%3 = insertvalue [ 2 x i128 ] undef, i128 %1, 0
22+
%4 = insertvalue [ 2 x i128 ] %3, i128 %2, 1
23+
24+
ret [ 2 x i128 ] %4
25+
}

0 commit comments

Comments
 (0)