@@ -88,8 +88,7 @@ define dso_local void @read_only_gep_asc0(ptr nocapture noundef writeonly %out,
8888; COMMON-NEXT: [[ENTRY:.*:]]
8989; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
9090; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
91- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
92- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
91+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
9392; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
9493; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
9594; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -115,8 +114,7 @@ define dso_local void @escape_ptr(ptr nocapture noundef readnone %out, ptr nound
115114; COMMON-NEXT: [[ENTRY:.*:]]
116115; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
117116; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
118- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
119- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
117+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
120118; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
121119; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
122120; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S3]])
@@ -134,8 +132,7 @@ define dso_local void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr n
134132; COMMON-NEXT: [[ENTRY:.*:]]
135133; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
136134; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
137- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
138- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
135+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
139136; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
140137; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
141138; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -155,8 +152,7 @@ define dso_local void @escape_ptr_store(ptr nocapture noundef writeonly %out, pt
155152; COMMON-NEXT: [[ENTRY:.*:]]
156153; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
157154; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
158- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
159- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
155+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
160156; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
161157; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
162158; COMMON-NEXT: store ptr [[S3]], ptr [[OUT2]], align 8
@@ -174,8 +170,7 @@ define dso_local void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out
174170; COMMON-NEXT: [[ENTRY:.*:]]
175171; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
176172; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
177- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
178- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
173+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
179174; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
180175; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
181176; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -195,8 +190,7 @@ define dso_local void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr
195190; COMMON-NEXT: [[ENTRY:.*:]]
196191; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
197192; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
198- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
199- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
193+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
200194; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
201195; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
202196; COMMON-NEXT: [[I:%.*]] = ptrtoint ptr [[S3]] to i64
@@ -232,8 +226,7 @@ define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr n
232226; COMMON-NEXT: [[ENTRY:.*:]]
233227; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
234228; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
235- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
236- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
229+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
237230; COMMON-NEXT: [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
238231; COMMON-NEXT: [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
239232; COMMON-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S3]], ptr [[IN2]], i64 16, i1 true)
@@ -251,8 +244,7 @@ define dso_local void @copy_on_store(ptr nocapture noundef readonly %in, ptr noc
251244; COMMON-NEXT: [[BB:.*:]]
252245; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
253246; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
254- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
255- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
247+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
256248; COMMON-NEXT: [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
257249; COMMON-NEXT: [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
258250; COMMON-NEXT: [[I:%.*]] = load i32, ptr [[IN2]], align 4
@@ -273,12 +265,10 @@ define void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2,
273265; SM_60-NEXT: [[OUT8:%.*]] = addrspacecast ptr addrspace(1) [[OUT7]] to ptr
274266; SM_60-NEXT: [[INPUT24:%.*]] = alloca i32, align 4
275267; SM_60-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
276- ; SM_60-NEXT: [[INPUT26:%.*]] = load i32, ptr addrspace(101) [[INPUT25]], align 4
277- ; SM_60-NEXT: store i32 [[INPUT26]], ptr [[INPUT24]], align 4
268+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false)
278269; SM_60-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
279270; SM_60-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
280- ; SM_60-NEXT: [[INPUT13:%.*]] = load i32, ptr addrspace(101) [[INPUT12]], align 4
281- ; SM_60-NEXT: store i32 [[INPUT13]], ptr [[INPUT11]], align 4
271+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
282272; SM_60-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
283273; SM_60-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
284274; SM_60-NEXT: store i32 [[VALLOADED]], ptr [[OUT8]], align 4
@@ -313,12 +303,10 @@ define void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %i
313303; COMMON-NEXT: [[OUT8:%.*]] = addrspacecast ptr addrspace(1) [[OUT7]] to ptr
314304; COMMON-NEXT: [[INPUT24:%.*]] = alloca i32, align 4
315305; COMMON-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
316- ; COMMON-NEXT: [[INPUT26:%.*]] = load i32, ptr addrspace(101) [[INPUT25]], align 4
317- ; COMMON-NEXT: store i32 [[INPUT26]], ptr [[INPUT24]], align 4
306+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false)
318307; COMMON-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
319308; COMMON-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
320- ; COMMON-NEXT: [[INPUT13:%.*]] = load i32, ptr addrspace(101) [[INPUT12]], align 4
321- ; COMMON-NEXT: store i32 [[INPUT13]], ptr [[INPUT11]], align 4
309+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
322310; COMMON-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
323311; COMMON-NEXT: store i32 1, ptr [[PTRNEW]], align 4
324312; COMMON-NEXT: ret void
@@ -337,12 +325,10 @@ define void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S)
337325; SM_60-NEXT: [[INOUT8:%.*]] = addrspacecast ptr addrspace(1) [[INOUT7]] to ptr
338326; SM_60-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
339327; SM_60-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
340- ; SM_60-NEXT: [[INPUT26:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT25]], align 8
341- ; SM_60-NEXT: store [[STRUCT_S]] [[INPUT26]], ptr [[INPUT24]], align 4
328+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
342329; SM_60-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
343330; SM_60-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
344- ; SM_60-NEXT: [[INPUT13:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT12]], align 4
345- ; SM_60-NEXT: store [[STRUCT_S]] [[INPUT13]], ptr [[INPUT11]], align 4
331+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
346332; SM_60-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
347333; SM_60: [[FIRST]]:
348334; SM_60-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
@@ -402,12 +388,10 @@ define void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr byval(%str
402388; COMMON-NEXT: [[BB:.*:]]
403389; COMMON-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
404390; COMMON-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
405- ; COMMON-NEXT: [[INPUT26:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT25]], align 8
406- ; COMMON-NEXT: store [[STRUCT_S]] [[INPUT26]], ptr [[INPUT24]], align 4
391+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
407392; COMMON-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
408393; COMMON-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
409- ; COMMON-NEXT: [[INPUT13:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT12]], align 4
410- ; COMMON-NEXT: store [[STRUCT_S]] [[INPUT13]], ptr [[INPUT11]], align 4
394+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
411395; COMMON-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
412396; COMMON: [[FIRST]]:
413397; COMMON-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
0 commit comments