diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 5c1f717694a4c..208d724f7ae28 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -261,6 +261,15 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, return; } + // Given an array type, recursively traverse the elements with custom ComputePTXValueVTs. + if (ArrayType *ATy = dyn_cast(Ty)) { + Type *EltTy = ATy->getElementType(); + uint64_t EltSize = DL.getTypeAllocSize(EltTy); + for (int I : llvm::seq(ATy->getNumElements())) + ComputePTXValueVTs(TLI, DL, EltTy, ValueVTs, Offsets, StartingOffset + I * EltSize); + return; + } + ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { EVT VT = TempVTs[i]; diff --git a/llvm/test/CodeGen/NVPTX/i128-array.ll b/llvm/test/CodeGen/NVPTX/i128-array.ll new file mode 100644 index 0000000000000..348df8dcc7373 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/i128-array.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +define [2 x i128] @foo(i64 %a, i32 %b) { +; CHECK-LABEL: foo( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [foo_param_1]; +; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0]; +; CHECK-NEXT: shr.s64 %rd2, %rd1, 63; +; CHECK-NEXT: cvt.s64.s32 %rd3, %r1; +; CHECK-NEXT: shr.s64 %rd4, %rd3, 63; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4}; +; CHECK-NEXT: ret; + %1 = sext i64 %a to i128 + %2 = sext i32 %b to i128 + %3 = insertvalue [2 x i128] undef, i128 %1, 0 + %4 = insertvalue [2 x i128] %3, i128 %2, 1 + + ret [2 x i128] %4 +} + +define [2 x i128] @foo2(ptr byval([2 x i128]) %a) { +; CHECK-LABEL: foo2( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b64 %rd1, foo2_param_0; +; CHECK-NEXT: ld.param.u64 %rd2, [foo2_param_0+8]; +; CHECK-NEXT: ld.param.u64 %rd3, [foo2_param_0]; +; CHECK-NEXT: ld.param.u64 %rd4, [foo2_param_0+24]; +; CHECK-NEXT: ld.param.u64 %rd5, [foo2_param_0+16]; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd5, %rd4}; +; CHECK-NEXT: ret; + %ptr0 = getelementptr [2 x i128], ptr %a, i64 0, i32 0 + %1 = load i128, i128* %ptr0 + %ptr1 = getelementptr [2 x i128], ptr %a, i64 0, i32 1 + %2 = load i128, i128* %ptr1 + %3 = insertvalue [2 x i128] undef, i128 %1, 0 + %4 = insertvalue [2 x i128] %3, i128 %2, 1 + + ret [2 x i128] %4 +} + +define [2 x i128] @foo3([2 x i128] %a) { +; CHECK-LABEL: foo3( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [foo3_param_0+16]; +; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [foo3_param_0]; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4}; +; CHECK-NEXT: ret; + %1 = extractvalue [2 x i128] %a, 0 + %2 = extractvalue [2 x i128] %a, 1 + %3 = insertvalue [2 x i128] undef, i128 %1, 0 + %4 = insertvalue [2 x i128] %3, i128 %2, 1 + + ret [2 x i128] %4 +}