Skip to content

Commit 1379740

Browse files
authored
[NVPTX] Fix segfault with i128 types in arrays (#120562)
- Process i128 array with custom ComputePTXValueVTs. The i128 elements should be handled and split into i64 types in the recursion. - Add corresponding tests
1 parent 87f4240 commit 1379740

File tree

2 files changed

+77
-0
lines changed

2 files changed

+77
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,15 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
261261
return;
262262
}
263263

264+
// Given an array type, recursively traverse the elements with custom ComputePTXValueVTs.
265+
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
266+
Type *EltTy = ATy->getElementType();
267+
uint64_t EltSize = DL.getTypeAllocSize(EltTy);
268+
for (int I : llvm::seq<int>(ATy->getNumElements()))
269+
ComputePTXValueVTs(TLI, DL, EltTy, ValueVTs, Offsets, StartingOffset + I * EltSize);
270+
return;
271+
}
272+
264273
ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
265274
for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
266275
EVT VT = TempVTs[i];

llvm/test/CodeGen/NVPTX/i128-array.ll

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
3+
4+
define [2 x i128] @foo(i64 %a, i32 %b) {
5+
; CHECK-LABEL: foo(
6+
; CHECK: {
7+
; CHECK-NEXT: .reg .b32 %r<2>;
8+
; CHECK-NEXT: .reg .b64 %rd<5>;
9+
; CHECK-EMPTY:
10+
; CHECK-NEXT: // %bb.0:
11+
; CHECK-NEXT: ld.param.u32 %r1, [foo_param_1];
12+
; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0];
13+
; CHECK-NEXT: shr.s64 %rd2, %rd1, 63;
14+
; CHECK-NEXT: cvt.s64.s32 %rd3, %r1;
15+
; CHECK-NEXT: shr.s64 %rd4, %rd3, 63;
16+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
17+
; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4};
18+
; CHECK-NEXT: ret;
19+
%1 = sext i64 %a to i128
20+
%2 = sext i32 %b to i128
21+
%3 = insertvalue [2 x i128] undef, i128 %1, 0
22+
%4 = insertvalue [2 x i128] %3, i128 %2, 1
23+
24+
ret [2 x i128] %4
25+
}
26+
27+
define [2 x i128] @foo2(ptr byval([2 x i128]) %a) {
28+
; CHECK-LABEL: foo2(
29+
; CHECK: {
30+
; CHECK-NEXT: .reg .b64 %rd<6>;
31+
; CHECK-EMPTY:
32+
; CHECK-NEXT: // %bb.0:
33+
; CHECK-NEXT: mov.b64 %rd1, foo2_param_0;
34+
; CHECK-NEXT: ld.param.u64 %rd2, [foo2_param_0+8];
35+
; CHECK-NEXT: ld.param.u64 %rd3, [foo2_param_0];
36+
; CHECK-NEXT: ld.param.u64 %rd4, [foo2_param_0+24];
37+
; CHECK-NEXT: ld.param.u64 %rd5, [foo2_param_0+16];
38+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2};
39+
; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd5, %rd4};
40+
; CHECK-NEXT: ret;
41+
%ptr0 = getelementptr [2 x i128], ptr %a, i64 0, i32 0
42+
%1 = load i128, i128* %ptr0
43+
%ptr1 = getelementptr [2 x i128], ptr %a, i64 0, i32 1
44+
%2 = load i128, i128* %ptr1
45+
%3 = insertvalue [2 x i128] undef, i128 %1, 0
46+
%4 = insertvalue [2 x i128] %3, i128 %2, 1
47+
48+
ret [2 x i128] %4
49+
}
50+
51+
define [2 x i128] @foo3([2 x i128] %a) {
52+
; CHECK-LABEL: foo3(
53+
; CHECK: {
54+
; CHECK-NEXT: .reg .b64 %rd<5>;
55+
; CHECK-EMPTY:
56+
; CHECK-NEXT: // %bb.0:
57+
; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [foo3_param_0+16];
58+
; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [foo3_param_0];
59+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
60+
; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4};
61+
; CHECK-NEXT: ret;
62+
%1 = extractvalue [2 x i128] %a, 0
63+
%2 = extractvalue [2 x i128] %a, 1
64+
%3 = insertvalue [2 x i128] undef, i128 %1, 0
65+
%4 = insertvalue [2 x i128] %3, i128 %2, 1
66+
67+
ret [2 x i128] %4
68+
}

0 commit comments

Comments
 (0)