Skip to content

Commit 35bcad6

Browse files
committed
[NVPTX] Coalesce kernel argument copies
1 parent fc56a3c commit 35bcad6

File tree

4 files changed

+83
-49
lines changed

4 files changed

+83
-49
lines changed

llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -623,15 +623,28 @@ void NVPTXLowerArgs::handleByValParam(const NVPTXTargetMachine &TM,
623623
Value *ArgInParam = new AddrSpaceCastInst(
624624
Arg, PointerType::get(Arg->getContext(), ADDRESS_SPACE_PARAM),
625625
Arg->getName(), FirstInst);
626+
// Create an opaque type of same size as StructType but without padding
627+
// holes as this could have been a union.
628+
const auto StructBytes = *AllocA->getAllocationSize(DL);
629+
SmallVector<Type *, 5> ChunkTypes;
630+
if (StructBytes >= 16) {
631+
Type *IntType = Type::getInt64Ty(Func->getContext());
632+
Type *ChunkType = VectorType::get(IntType, 2, false);
633+
Type *OpaqueType = StructBytes < 32 ? ChunkType :
634+
ArrayType::get(ChunkType, StructBytes / 16);
635+
ChunkTypes.push_back(OpaqueType);
636+
}
637+
for (const auto ChunkBytes: {8, 4, 2, 1}) {
638+
if (StructBytes & ChunkBytes) {
639+
Type *ChunkType = Type::getIntNTy(Func->getContext(), 8 * ChunkBytes);
640+
ChunkTypes.push_back(ChunkType);
641+
}
642+
}
643+
Type * OpaqueType = ChunkTypes.size() == 1 ? ChunkTypes[0] :
644+
StructType::create(ChunkTypes);
626645
// Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
627646
// addrspacecast preserves alignment. Since params are constant, this load
628647
// is definitely not volatile.
629-
const auto StructBytes = *AllocA->getAllocationSize(DL);
630-
const auto ChunkBytes = (StructBytes % 8 == 0) ? 8 :
631-
(StructBytes % 4 == 0) ? 4 :
632-
(StructBytes % 2 == 0) ? 2 : 1;
633-
Type *ChunkType = Type::getIntNTy(Func->getContext(), 8 * ChunkBytes);
634-
Type *OpaqueType = ArrayType::get(ChunkType, StructBytes / ChunkBytes);
635648
LoadInst *LI =
636649
new LoadInst(OpaqueType, ArgInParam, Arg->getName(),
637650
/*isVolatile=*/false, AllocA->getAlign(), FirstInst);

llvm/test/CodeGen/NVPTX/lower-args.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,15 @@
77
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
88
target triple = "nvptx64-nvidia-cuda"
99

10+
; IR: [[OPAQUE_OUTER:.*]] = type { <2 x i64>, i64 }
1011
%class.outer = type <{ %class.inner, i32, [4 x i8] }>
1112
%class.inner = type { ptr, ptr }
1213

1314
; Check that nvptx-lower-args preserves arg alignment
1415
; COMMON-LABEL: load_alignment
1516
define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %arg) {
1617
entry:
17-
; IR: load [3 x i64], ptr addrspace(101)
18+
; IR: load [[OPAQUE_OUTER]], ptr addrspace(101)
1819
; IR-SAME: align 8
1920
; PTX: ld.param.u64
2021
; PTX-NOT: ld.param.u8

llvm/test/CodeGen/NVPTX/lower-byval-args.ll

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ source_filename = "<stdin>"
55
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
66
target triple = "nvptx64-nvidia-cuda"
77

8+
; COMMON: [[OPAQUE_C:.*]] = type { [2 x <2 x i64>], i64, i32, i8 }
89
%struct.S = type { i32, i32 }
910

1011
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
@@ -88,8 +89,8 @@ define dso_local void @read_only_gep_asc0(ptr nocapture noundef writeonly %out,
8889
; COMMON-NEXT: [[ENTRY:.*:]]
8990
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
9091
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
91-
; COMMON-NEXT: [[S5:%.*]] = load [1 x i64], ptr addrspace(101) [[S4]], align 4
92-
; COMMON-NEXT: store [1 x i64] [[S5]], ptr [[S3]], align 4
92+
; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
93+
; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
9394
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
9495
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
9596
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -115,8 +116,8 @@ define dso_local void @escape_ptr(ptr nocapture noundef readnone %out, ptr nound
115116
; COMMON-NEXT: [[ENTRY:.*:]]
116117
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
117118
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
118-
; COMMON-NEXT: [[S5:%.*]] = load [1 x i64], ptr addrspace(101) [[S4]], align 4
119-
; COMMON-NEXT: store [1 x i64] [[S5]], ptr [[S3]], align 4
119+
; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
120+
; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
120121
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
121122
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
122123
; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S3]])
@@ -134,8 +135,8 @@ define dso_local void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr n
134135
; COMMON-NEXT: [[ENTRY:.*:]]
135136
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
136137
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
137-
; COMMON-NEXT: [[S5:%.*]] = load [1 x i64], ptr addrspace(101) [[S4]], align 4
138-
; COMMON-NEXT: store [1 x i64] [[S5]], ptr [[S3]], align 4
138+
; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
139+
; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
139140
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
140141
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
141142
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -155,8 +156,8 @@ define dso_local void @escape_ptr_store(ptr nocapture noundef writeonly %out, pt
155156
; COMMON-NEXT: [[ENTRY:.*:]]
156157
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
157158
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
158-
; COMMON-NEXT: [[S5:%.*]] = load [1 x i64], ptr addrspace(101) [[S4]], align 4
159-
; COMMON-NEXT: store [1 x i64] [[S5]], ptr [[S3]], align 4
159+
; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
160+
; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
160161
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
161162
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
162163
; COMMON-NEXT: store ptr [[S3]], ptr [[OUT2]], align 8
@@ -174,8 +175,8 @@ define dso_local void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out
174175
; COMMON-NEXT: [[ENTRY:.*:]]
175176
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
176177
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
177-
; COMMON-NEXT: [[S5:%.*]] = load [1 x i64], ptr addrspace(101) [[S4]], align 4
178-
; COMMON-NEXT: store [1 x i64] [[S5]], ptr [[S3]], align 4
178+
; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
179+
; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
179180
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
180181
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
181182
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -195,8 +196,8 @@ define dso_local void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr
195196
; COMMON-NEXT: [[ENTRY:.*:]]
196197
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
197198
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
198-
; COMMON-NEXT: [[S5:%.*]] = load [1 x i64], ptr addrspace(101) [[S4]], align 4
199-
; COMMON-NEXT: store [1 x i64] [[S5]], ptr [[S3]], align 4
199+
; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
200+
; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
200201
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
201202
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
202203
; COMMON-NEXT: [[I:%.*]] = ptrtoint ptr [[S3]] to i64
@@ -232,8 +233,8 @@ define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr n
232233
; COMMON-NEXT: [[ENTRY:.*:]]
233234
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
234235
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
235-
; COMMON-NEXT: [[S5:%.*]] = load [1 x i64], ptr addrspace(101) [[S4]], align 4
236-
; COMMON-NEXT: store [1 x i64] [[S5]], ptr [[S3]], align 4
236+
; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
237+
; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
237238
; COMMON-NEXT: [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
238239
; COMMON-NEXT: [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
239240
; COMMON-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S3]], ptr [[IN2]], i64 16, i1 true)
@@ -251,8 +252,8 @@ define dso_local void @copy_on_store(ptr nocapture noundef readonly %in, ptr noc
251252
; COMMON-NEXT: [[BB:.*:]]
252253
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
253254
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
254-
; COMMON-NEXT: [[S5:%.*]] = load [1 x i64], ptr addrspace(101) [[S4]], align 4
255-
; COMMON-NEXT: store [1 x i64] [[S5]], ptr [[S3]], align 4
255+
; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
256+
; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
256257
; COMMON-NEXT: [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
257258
; COMMON-NEXT: [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
258259
; COMMON-NEXT: [[I:%.*]] = load i32, ptr [[IN2]], align 4
@@ -273,12 +274,12 @@ define void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2,
273274
; SM_60-NEXT: [[OUT8:%.*]] = addrspacecast ptr addrspace(1) [[OUT7]] to ptr
274275
; SM_60-NEXT: [[INPUT24:%.*]] = alloca i32, align 4
275276
; SM_60-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
276-
; SM_60-NEXT: [[INPUT26:%.*]] = load [1 x i32], ptr addrspace(101) [[INPUT25]], align 4
277-
; SM_60-NEXT: store [1 x i32] [[INPUT26]], ptr [[INPUT24]], align 4
277+
; SM_60-NEXT: [[INPUT26:%.*]] = load i32, ptr addrspace(101) [[INPUT25]], align 4
278+
; SM_60-NEXT: store i32 [[INPUT26]], ptr [[INPUT24]], align 4
278279
; SM_60-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
279280
; SM_60-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
280-
; SM_60-NEXT: [[INPUT13:%.*]] = load [1 x i32], ptr addrspace(101) [[INPUT12]], align 4
281-
; SM_60-NEXT: store [1 x i32] [[INPUT13]], ptr [[INPUT11]], align 4
281+
; SM_60-NEXT: [[INPUT13:%.*]] = load i32, ptr addrspace(101) [[INPUT12]], align 4
282+
; SM_60-NEXT: store i32 [[INPUT13]], ptr [[INPUT11]], align 4
282283
; SM_60-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
283284
; SM_60-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
284285
; SM_60-NEXT: store i32 [[VALLOADED]], ptr [[OUT8]], align 4
@@ -313,12 +314,12 @@ define void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %i
313314
; COMMON-NEXT: [[OUT8:%.*]] = addrspacecast ptr addrspace(1) [[OUT7]] to ptr
314315
; COMMON-NEXT: [[INPUT24:%.*]] = alloca i32, align 4
315316
; COMMON-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
316-
; COMMON-NEXT: [[INPUT26:%.*]] = load [1 x i32], ptr addrspace(101) [[INPUT25]], align 4
317-
; COMMON-NEXT: store [1 x i32] [[INPUT26]], ptr [[INPUT24]], align 4
317+
; COMMON-NEXT: [[INPUT26:%.*]] = load i32, ptr addrspace(101) [[INPUT25]], align 4
318+
; COMMON-NEXT: store i32 [[INPUT26]], ptr [[INPUT24]], align 4
318319
; COMMON-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
319320
; COMMON-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
320-
; COMMON-NEXT: [[INPUT13:%.*]] = load [1 x i32], ptr addrspace(101) [[INPUT12]], align 4
321-
; COMMON-NEXT: store [1 x i32] [[INPUT13]], ptr [[INPUT11]], align 4
321+
; COMMON-NEXT: [[INPUT13:%.*]] = load i32, ptr addrspace(101) [[INPUT12]], align 4
322+
; COMMON-NEXT: store i32 [[INPUT13]], ptr [[INPUT11]], align 4
322323
; COMMON-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
323324
; COMMON-NEXT: store i32 1, ptr [[PTRNEW]], align 4
324325
; COMMON-NEXT: ret void
@@ -337,12 +338,12 @@ define void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S)
337338
; SM_60-NEXT: [[INOUT8:%.*]] = addrspacecast ptr addrspace(1) [[INOUT7]] to ptr
338339
; SM_60-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
339340
; SM_60-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
340-
; SM_60-NEXT: [[INPUT26:%.*]] = load [1 x i64], ptr addrspace(101) [[INPUT25]], align 8
341-
; SM_60-NEXT: store [1 x i64] [[INPUT26]], ptr [[INPUT24]], align 8
341+
; SM_60-NEXT: [[INPUT26:%.*]] = load i64, ptr addrspace(101) [[INPUT25]], align 8
342+
; SM_60-NEXT: store i64 [[INPUT26]], ptr [[INPUT24]], align 8
342343
; SM_60-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
343344
; SM_60-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
344-
; SM_60-NEXT: [[INPUT13:%.*]] = load [1 x i64], ptr addrspace(101) [[INPUT12]], align 4
345-
; SM_60-NEXT: store [1 x i64] [[INPUT13]], ptr [[INPUT11]], align 4
345+
; SM_60-NEXT: [[INPUT13:%.*]] = load i64, ptr addrspace(101) [[INPUT12]], align 4
346+
; SM_60-NEXT: store i64 [[INPUT13]], ptr [[INPUT11]], align 4
346347
; SM_60-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
347348
; SM_60: [[FIRST]]:
348349
; SM_60-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
@@ -402,12 +403,12 @@ define void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr byval(%str
402403
; COMMON-NEXT: [[BB:.*:]]
403404
; COMMON-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
404405
; COMMON-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
405-
; COMMON-NEXT: [[INPUT26:%.*]] = load [1 x i64], ptr addrspace(101) [[INPUT25]], align 8
406-
; COMMON-NEXT: store [1 x i64] [[INPUT26]], ptr [[INPUT24]], align 8
406+
; COMMON-NEXT: [[INPUT26:%.*]] = load i64, ptr addrspace(101) [[INPUT25]], align 8
407+
; COMMON-NEXT: store i64 [[INPUT26]], ptr [[INPUT24]], align 8
407408
; COMMON-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
408409
; COMMON-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
409-
; COMMON-NEXT: [[INPUT13:%.*]] = load [1 x i64], ptr addrspace(101) [[INPUT12]], align 4
410-
; COMMON-NEXT: store [1 x i64] [[INPUT13]], ptr [[INPUT11]], align 4
410+
; COMMON-NEXT: [[INPUT13:%.*]] = load i64, ptr addrspace(101) [[INPUT12]], align 4
411+
; COMMON-NEXT: store i64 [[INPUT13]], ptr [[INPUT11]], align 4
411412
; COMMON-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
412413
; COMMON: [[FIRST]]:
413414
; COMMON-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
@@ -447,8 +448,27 @@ define dso_local void @padding(ptr nocapture noundef readnone %out, ptr noundef
447448
; COMMON-NEXT: [[ENTRY:.*:]]
448449
; COMMON-NEXT: [[S1:%.*]] = alloca [[UNION_U]], align 4
449450
; COMMON-NEXT: [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
450-
; COMMON-NEXT: [[S3:%.*]] = load [1 x i64], ptr addrspace(101) [[S2]], align 4
451-
; COMMON-NEXT: store [1 x i64] [[S3]], ptr [[S1]], align 4
451+
; COMMON-NEXT: [[S3:%.*]] = load i64, ptr addrspace(101) [[S2]], align 4
452+
; COMMON-NEXT: store i64 [[S3]], ptr [[S1]], align 4
453+
; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S1]])
454+
; COMMON-NEXT: ret void
455+
;
456+
entry:
457+
call void @_Z6escapePv(ptr noundef nonnull %s) #0
458+
ret void
459+
}
460+
461+
%struct.C = type { [45 x i8] }
462+
463+
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
464+
define dso_local void @coalescing(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.C) align 4 %s) local_unnamed_addr #0 {
465+
; COMMON-LABEL: define dso_local void @coalescing(
466+
; COMMON-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_C:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
467+
; COMMON-NEXT: [[ENTRY:.*:]]
468+
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_C]], align 4
469+
; COMMON-NEXT: [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
470+
; COMMON-NEXT: [[S3:%.*]] = load [[OPAQUE_C]], ptr addrspace(101) [[S2]], align 4
471+
; COMMON-NEXT: store [[OPAQUE_C]] [[S3]], ptr [[S1]], align 4
452472
; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S1]])
453473
; COMMON-NEXT: ret void
454474
;

llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
1717
; CHECK-NEXT: cvta.local.u32 %SP, %SPL;
1818
; CHECK-NEXT: ld.param.u32 %r1, [caller_St8x4_param_1];
1919
; CHECK-NEXT: add.u32 %r3, %SPL, 0;
20-
; CHECK-NEXT: ld.param.u64 %rd1, [caller_St8x4_param_0+24];
21-
; CHECK-NEXT: ld.param.u64 %rd2, [caller_St8x4_param_0+16];
22-
; CHECK-NEXT: ld.param.u64 %rd3, [caller_St8x4_param_0+8];
23-
; CHECK-NEXT: ld.param.u64 %rd4, [caller_St8x4_param_0];
24-
; CHECK-NEXT: st.local.u64 [%r3], %rd4;
25-
; CHECK-NEXT: st.local.u64 [%r3+8], %rd3;
26-
; CHECK-NEXT: st.local.u64 [%r3+16], %rd2;
27-
; CHECK-NEXT: st.local.u64 [%r3+24], %rd1;
20+
; CHECK-NEXT: ld.param.u64 %rd1, [caller_St8x4_param_0+16];
21+
; CHECK-NEXT: ld.param.u64 %rd2, [caller_St8x4_param_0+24];
22+
; CHECK-NEXT: ld.param.u64 %rd3, [caller_St8x4_param_0];
23+
; CHECK-NEXT: ld.param.u64 %rd4, [caller_St8x4_param_0+8];
24+
; CHECK-NEXT: st.local.u64 [%r3+8], %rd4;
25+
; CHECK-NEXT: st.local.u64 [%r3], %rd3;
26+
; CHECK-NEXT: st.local.u64 [%r3+24], %rd2;
27+
; CHECK-NEXT: st.local.u64 [%r3+16], %rd1;
2828
; CHECK-NEXT: ld.u64 %rd5, [%SP+8];
2929
; CHECK-NEXT: ld.u64 %rd6, [%SP+0];
3030
; CHECK-NEXT: ld.u64 %rd7, [%SP+24];

0 commit comments

Comments
 (0)