@@ -5,7 +5,6 @@ source_filename = "<stdin>"
55target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
66target triple = "nvptx64-nvidia-cuda"
77
8- ; COMMON: [[OPAQUE_C:.*]] = type { [2 x <2 x i64>], i64, i32, i8 }
98%struct.S = type { i32 , i32 }
109
1110; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
@@ -89,8 +88,7 @@ define dso_local void @read_only_gep_asc0(ptr nocapture noundef writeonly %out,
8988; COMMON-NEXT: [[ENTRY:.*:]]
9089; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
9190; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
92- ; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
93- ; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
91+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
9492; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
9593; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
9694; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -116,8 +114,7 @@ define dso_local void @escape_ptr(ptr nocapture noundef readnone %out, ptr nound
116114; COMMON-NEXT: [[ENTRY:.*:]]
117115; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
118116; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
119- ; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
120- ; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
117+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
121118; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
122119; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
123120; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S3]])
@@ -135,8 +132,7 @@ define dso_local void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr n
135132; COMMON-NEXT: [[ENTRY:.*:]]
136133; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
137134; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
138- ; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
139- ; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
135+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
140136; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
141137; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
142138; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -156,8 +152,7 @@ define dso_local void @escape_ptr_store(ptr nocapture noundef writeonly %out, pt
156152; COMMON-NEXT: [[ENTRY:.*:]]
157153; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
158154; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
159- ; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
160- ; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
155+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
161156; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
162157; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
163158; COMMON-NEXT: store ptr [[S3]], ptr [[OUT2]], align 8
@@ -175,8 +170,7 @@ define dso_local void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out
175170; COMMON-NEXT: [[ENTRY:.*:]]
176171; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
177172; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
178- ; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
179- ; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
173+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
180174; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
181175; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
182176; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -196,8 +190,7 @@ define dso_local void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr
196190; COMMON-NEXT: [[ENTRY:.*:]]
197191; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
198192; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
199- ; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
200- ; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
193+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
201194; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
202195; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
203196; COMMON-NEXT: [[I:%.*]] = ptrtoint ptr [[S3]] to i64
@@ -233,8 +226,7 @@ define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr n
233226; COMMON-NEXT: [[ENTRY:.*:]]
234227; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
235228; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
236- ; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
237- ; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
229+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
238230; COMMON-NEXT: [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
239231; COMMON-NEXT: [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
240232; COMMON-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S3]], ptr [[IN2]], i64 16, i1 true)
@@ -252,8 +244,7 @@ define dso_local void @copy_on_store(ptr nocapture noundef readonly %in, ptr noc
252244; COMMON-NEXT: [[BB:.*:]]
253245; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
254246; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
255- ; COMMON-NEXT: [[S5:%.*]] = load i64, ptr addrspace(101) [[S4]], align 4
256- ; COMMON-NEXT: store i64 [[S5]], ptr [[S3]], align 4
247+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
257248; COMMON-NEXT: [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
258249; COMMON-NEXT: [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
259250; COMMON-NEXT: [[I:%.*]] = load i32, ptr [[IN2]], align 4
@@ -274,12 +265,10 @@ define void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2,
274265; SM_60-NEXT: [[OUT8:%.*]] = addrspacecast ptr addrspace(1) [[OUT7]] to ptr
275266; SM_60-NEXT: [[INPUT24:%.*]] = alloca i32, align 4
276267; SM_60-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
277- ; SM_60-NEXT: [[INPUT26:%.*]] = load i32, ptr addrspace(101) [[INPUT25]], align 4
278- ; SM_60-NEXT: store i32 [[INPUT26]], ptr [[INPUT24]], align 4
268+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false)
279269; SM_60-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
280270; SM_60-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
281- ; SM_60-NEXT: [[INPUT13:%.*]] = load i32, ptr addrspace(101) [[INPUT12]], align 4
282- ; SM_60-NEXT: store i32 [[INPUT13]], ptr [[INPUT11]], align 4
271+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
283272; SM_60-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
284273; SM_60-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
285274; SM_60-NEXT: store i32 [[VALLOADED]], ptr [[OUT8]], align 4
@@ -314,12 +303,10 @@ define void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %i
314303; COMMON-NEXT: [[OUT8:%.*]] = addrspacecast ptr addrspace(1) [[OUT7]] to ptr
315304; COMMON-NEXT: [[INPUT24:%.*]] = alloca i32, align 4
316305; COMMON-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
317- ; COMMON-NEXT: [[INPUT26:%.*]] = load i32, ptr addrspace(101) [[INPUT25]], align 4
318- ; COMMON-NEXT: store i32 [[INPUT26]], ptr [[INPUT24]], align 4
306+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false)
319307; COMMON-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
320308; COMMON-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
321- ; COMMON-NEXT: [[INPUT13:%.*]] = load i32, ptr addrspace(101) [[INPUT12]], align 4
322- ; COMMON-NEXT: store i32 [[INPUT13]], ptr [[INPUT11]], align 4
309+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
323310; COMMON-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
324311; COMMON-NEXT: store i32 1, ptr [[PTRNEW]], align 4
325312; COMMON-NEXT: ret void
@@ -338,12 +325,10 @@ define void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S)
338325; SM_60-NEXT: [[INOUT8:%.*]] = addrspacecast ptr addrspace(1) [[INOUT7]] to ptr
339326; SM_60-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
340327; SM_60-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
341- ; SM_60-NEXT: [[INPUT26:%.*]] = load i64, ptr addrspace(101) [[INPUT25]], align 8
342- ; SM_60-NEXT: store i64 [[INPUT26]], ptr [[INPUT24]], align 8
328+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
343329; SM_60-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
344330; SM_60-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
345- ; SM_60-NEXT: [[INPUT13:%.*]] = load i64, ptr addrspace(101) [[INPUT12]], align 4
346- ; SM_60-NEXT: store i64 [[INPUT13]], ptr [[INPUT11]], align 4
331+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
347332; SM_60-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
348333; SM_60: [[FIRST]]:
349334; SM_60-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
@@ -403,12 +388,10 @@ define void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr byval(%str
403388; COMMON-NEXT: [[BB:.*:]]
404389; COMMON-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
405390; COMMON-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
406- ; COMMON-NEXT: [[INPUT26:%.*]] = load i64, ptr addrspace(101) [[INPUT25]], align 8
407- ; COMMON-NEXT: store i64 [[INPUT26]], ptr [[INPUT24]], align 8
391+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
408392; COMMON-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
409393; COMMON-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
410- ; COMMON-NEXT: [[INPUT13:%.*]] = load i64, ptr addrspace(101) [[INPUT12]], align 4
411- ; COMMON-NEXT: store i64 [[INPUT13]], ptr [[INPUT11]], align 4
394+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
412395; COMMON-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
413396; COMMON: [[FIRST]]:
414397; COMMON-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
@@ -438,45 +421,6 @@ merge: ; preds = %second, %first
438421 ret void
439422}
440423
441- %union.U = type { %struct.P }
442- %struct.P = type { i8 , i32 }
443-
444- ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
445- define dso_local void @padding (ptr nocapture noundef readnone %out , ptr noundef byval (%union.U ) align 4 %s ) local_unnamed_addr #0 {
446- ; COMMON-LABEL: define dso_local void @padding(
447- ; COMMON-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[UNION_U:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
448- ; COMMON-NEXT: [[ENTRY:.*:]]
449- ; COMMON-NEXT: [[S1:%.*]] = alloca [[UNION_U]], align 4
450- ; COMMON-NEXT: [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
451- ; COMMON-NEXT: [[S3:%.*]] = load i64, ptr addrspace(101) [[S2]], align 4
452- ; COMMON-NEXT: store i64 [[S3]], ptr [[S1]], align 4
453- ; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S1]])
454- ; COMMON-NEXT: ret void
455- ;
456- entry:
457- call void @_Z6escapePv (ptr noundef nonnull %s ) #0
458- ret void
459- }
460-
461- %struct.C = type { [45 x i8 ] }
462-
463- ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
464- define dso_local void @coalescing (ptr nocapture noundef readnone %out , ptr noundef byval (%struct.C ) align 4 %s ) local_unnamed_addr #0 {
465- ; COMMON-LABEL: define dso_local void @coalescing(
466- ; COMMON-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_C:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
467- ; COMMON-NEXT: [[ENTRY:.*:]]
468- ; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_C]], align 4
469- ; COMMON-NEXT: [[S2:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
470- ; COMMON-NEXT: [[S3:%.*]] = load [[OPAQUE_C]], ptr addrspace(101) [[S2]], align 4
471- ; COMMON-NEXT: store [[OPAQUE_C]] [[S3]], ptr [[S1]], align 4
472- ; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S1]])
473- ; COMMON-NEXT: ret void
474- ;
475- entry:
476- call void @_Z6escapePv (ptr noundef nonnull %s ) #0
477- ret void
478- }
479-
480424attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "no-trapping-math" ="true" "target-cpu" ="sm_60" "target-features" ="+ptx78,+sm_60" "uniform-work-group-size" ="true" }
481425attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
482426attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
0 commit comments