diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index bab0e21067eea..7377b5bcbc347 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -1,9 +1,10 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=ALL,X86 %s -// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s -// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s -// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s -// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn -cl-ext=+__opencl_c_program_scope_global_variables | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s -// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes=X86 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN20 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -check-prefixes=SPIR %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn -cl-ext=+__opencl_c_program_scope_global_variables | FileCheck -check-prefixes=AMDGCN30-GVAR %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN30 %s typedef int int2 __attribute__((ext_vector_type(2))); @@ -45,147 +46,1236 @@ struct LargeStructTwoMember { struct LargeStructOneMember g_s; #endif -// X86-LABEL: define{{.*}} void @foo(ptr dead_on_unwind noalias writable sret(%struct.Mat4X4) align 4 %agg.result, ptr noundef byval(%struct.Mat3X3) align 4 %in) -// AMDGCN-LABEL: define{{.*}} %struct.Mat4X4 @foo([9 x i32] %in.coerce) +// +// X86-LABEL: define void @foo( +// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// AMDGCN20-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN20-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// SPIR-LABEL: define dso_local spir_func void @foo( +// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN30-GVAR-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN30-GVAR-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// AMDGCN30-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN30-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN30-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN30-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { Mat4X4 out; return out; } -// ALL-LABEL: define {{.*}} void @ker -// Expect two mem copies: one for the argument "in", and one for -// the return value. -// X86: call void @llvm.memcpy.p0.p1.i32(ptr -// X86: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) - -// AMDGCN: load [9 x i32], ptr addrspace(1) -// AMDGCN: call %struct.Mat4X4 @foo([9 x i32] -// AMDGCN: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) +// +// X86-LABEL: define spir_kernel void @ker( +// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 +// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4 +// X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0 +// X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1 +// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false) +// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]] +// X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] +// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_kernel void @ker( +// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0 +// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1 +// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false) +// SPIR-NEXT: call spir_func void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]] +// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN30-GVAR-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] +// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN30-GVAR-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN30-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] +// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN30-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN30-NEXT: ret void +// kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { out[0] = foo(in[1]); } -// X86-LABEL: define{{.*}} void @foo_large(ptr dead_on_unwind noalias writable sret(%struct.Mat64X64) align 4 %agg.result, ptr noundef byval(%struct.Mat32X32) align 4 %in) -// AMDGCN-LABEL: define{{.*}} void @foo_large(ptr addrspace(5) dead_on_unwind noalias writable sret(%struct.Mat64X64) align 4 %agg.result, ptr addrspace(5) noundef byref(%struct.Mat32X32) align 4 %{{.*}} -// AMDGCN: %in = alloca %struct.Mat32X32, align 4, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 %in, ptr addrspace(5) align 4 %{{.*}}, i64 4096, i1 false) +// +// X86-LABEL: define void @foo_large( +// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local void @foo_large( +// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local void @foo_large( +// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_func void @foo_large( +// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local void @foo_large( +// AMDGCN30-GVAR-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local void @foo_large( +// AMDGCN30-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN30-NEXT: ret void +// Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { Mat64X64 out; return out; } -// ALL-LABEL: define {{.*}} void @ker_large -// Expect two mem copies: one for the argument "in", and one for -// the return value. -// X86: call void @llvm.memcpy.p0.p1.i32(ptr -// X86: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) -// AMDGCN: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) -// AMDGCN: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) +// +// X86-LABEL: define spir_kernel void @ker_large( +// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4 +// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4 +// X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0 +// X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1 +// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false) +// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) +// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker_large( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) +// AMDGCN20-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_kernel void @ker_large( +// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META6]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4 +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0 +// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1 +// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false) +// SPIR-NEXT: call spir_func void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker_large( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) +// AMDGCN30-GVAR-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker_large( +// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) +// AMDGCN30-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN30-NEXT: ret void +// kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { out[0] = foo_large(in[1]); } -// AMDGCN-LABEL: define{{.*}} void @FuncOneMember(<2 x i32> %u.coerce) +// +// X86-LABEL: define void @FuncOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[X]], align 8 +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local void @FuncOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local void @FuncOneMember( +// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_func void @FuncOneMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneMember( +// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local void @FuncOneMember( +// AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN30-NEXT: ret void +// void FuncOneMember(struct StructOneMember u) { u.x = (int2)(0, 0); } -// AMDGCN-LABEL: define{{.*}} void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %{{.*}} -// AMDGCN: %u = alloca %struct.LargeStructOneMember, align 8, addrspace(5) -// AMDGCN: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 %u, ptr addrspace(5) align 8 %{{.*}}, i64 800, i1 false) -// AMDGCN-NOT: addrspacecast -// AMDGCN: store <2 x i32> %{{.*}}, ptr addrspace(5) +// +// X86-LABEL: define void @FuncOneLargeMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_func void @FuncOneLargeMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8 +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 +// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN30-NEXT: ret void +// void FuncOneLargeMember(struct LargeStructOneMember u) { u.x[0] = (int2)(0, 0); } -// AMDGCN20-LABEL: define{{.*}} void @test_indirect_arg_globl() -// AMDGCN20: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) -// AMDGCN20: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 %[[byval_temp]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) -// AMDGCN20: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) #if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables)) +// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_globl( +// AMDGCN20-SAME: ) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_globl( +// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) +// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// void test_indirect_arg_globl(void) { FuncOneLargeMember(g_s); } #endif -// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @test_indirect_arg_local() -// AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) -// AMDGCN: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 %[[byval_temp]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) -// AMDGCN: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) +// +// X86-LABEL: define spir_kernel void @test_indirect_arg_local( +// X86-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4 +// X86-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN20-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_kernel void @test_indirect_arg_local( +// SPIR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 +// SPIR-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) +// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN30-GVAR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN30-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// kernel void test_indirect_arg_local(void) { local struct LargeStructOneMember l_s; FuncOneLargeMember(l_s); } -// AMDGCN-LABEL: define{{.*}} void @test_indirect_arg_private() -// AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) -// AMDGCN-NOT: @llvm.memcpy -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[p_s]]) +// +// X86-LABEL: define void @test_indirect_arg_private( +// X86-SAME: ) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN-SAME: ) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN20-SAME: ) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_func void @test_indirect_arg_private( +// SPIR-SAME: ) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 +// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN30-SAME: ) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// void test_indirect_arg_private(void) { struct LargeStructOneMember p_s; FuncOneLargeMember(p_s); } -// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelOneMember -// AMDGCN-SAME: (<2 x i32> %[[u_coerce:.*]]) -// AMDGCN: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5) -// AMDGCN: %[[coerce_dive:.*]] = getelementptr inbounds nuw %struct.StructOneMember, ptr addrspace(5) %[[u]], i32 0, i32 0 -// AMDGCN: store <2 x i32> %[[u_coerce]], ptr addrspace(5) %[[coerce_dive]] -// AMDGCN: call void @FuncOneMember(<2 x i32> +// +// X86-LABEL: define spir_kernel void @KernelOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META10:![0-9]+]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12:![0-9]+]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// kernel void KernelOneMember(struct StructOneMember u) { FuncOneMember(u); } -// SPIR: call void @llvm.memcpy.p0.p1.i32 -// SPIR-NOT: addrspacecast +// +// X86-LABEL: define spir_kernel void @KernelOneMemberSpir( +// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 4 +// X86-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 +// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false) +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMemberSpir( +// SPIR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8 +// SPIR-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 +// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false) +// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN30-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// kernel void KernelOneMemberSpir(global struct StructOneMember* u) { FuncOneMember(*u); } -// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) -// AMDGCN: %[[U_ELEM:.*]] = getelementptr inbounds nuw %struct.LargeStructOneMember, ptr addrspace(5) %[[U]], i32 0, i32 0 -// AMDGCN: %[[EXTRACT:.*]] = extractvalue %struct.LargeStructOneMember %u.coerce, 0 -// AMDGCN: store [100 x <2 x i32>] %[[EXTRACT]], ptr addrspace(5) %[[U_ELEM]], align 8 -// AMDGCN: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[U]]) +// +// X86-LABEL: define spir_kernel void @KernelLargeOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN20-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeOneMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-GVAR-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// kernel void KernelLargeOneMember(struct LargeStructOneMember u) { FuncOneLargeMember(u); } -// AMDGCN-LABEL: define{{.*}} void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1) +// +// X86-LABEL: define void @FuncTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[Y]], align 8 +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN20-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_func void @FuncTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[Y]], align 8 +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN30-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN30-NEXT: ret void +// void FuncTwoMember(struct StructTwoMember u) { u.y = (int2)(0, 0); } -// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember -// AMDGCN-SAME: (ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) -// AMDGCN: %[[U:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5) -// AMDGCN: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 %[[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// +// X86-LABEL: define void @FuncLargeTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_func void @FuncLargeTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8 +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 +// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN30-NEXT: ret void +// void FuncLargeTwoMember(struct LargeStructTwoMember u) { u.y[0] = (int2)(0, 0); } -// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelTwoMember -// AMDGCN-SAME: (%struct.StructTwoMember %[[u_coerce:.*]]) -// AMDGCN: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5) -// AMDGCN: %[[LD0:.*]] = load <2 x i32>, ptr addrspace(5) -// AMDGCN: %[[LD1:.*]] = load <2 x i32>, ptr addrspace(5) -// AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]]) +// +// X86-LABEL: define spir_kernel void @KernelTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN20-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN20-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN20-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN20-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_kernel void @KernelTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN30-GVAR-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN30-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN30-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN30-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN30-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// kernel void KernelTwoMember(struct StructTwoMember u) { FuncTwoMember(u); } -// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeTwoMember -// AMDGCN-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]]) -// AMDGCN: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5) -// AMDGCN: %[[U_PTR0:.*]] = getelementptr inbounds nuw %struct.LargeStructTwoMember, ptr addrspace(5) %[[u]], i32 0, i32 0 -// AMDGCN: %[[EXTRACT0:.*]] = extractvalue %struct.LargeStructTwoMember %u.coerce, 0 -// AMDGCN: store [40 x <2 x i32>] %[[EXTRACT0]], ptr addrspace(5) %[[U_PTR0]] -// AMDGCN: %[[U_PTR1:.*]] = getelementptr inbounds nuw %struct.LargeStructTwoMember, ptr addrspace(5) %[[u]], i32 0, i32 1 -// AMDGCN: %[[EXTRACT1:.*]] = extractvalue %struct.LargeStructTwoMember %u.coerce, 1 -// AMDGCN: store [20 x <2 x i32>] %[[EXTRACT1]], ptr addrspace(5) %[[U_PTR1]] -// AMDGCN: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref(%struct.LargeStructTwoMember) align 8 %[[u]]) +// +// X86-LABEL: define spir_kernel void @KernelLargeTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN20-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN20-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN20-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-GVAR-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN30-GVAR-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN30-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { FuncLargeTwoMember(u); } +//. +// X86: [[META4]] = !{i32 1, i32 1} +// X86: [[META5]] = !{!"none", !"none"} +// X86: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} +// X86: [[META7]] = !{!"", !""} +// X86: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} +// X86: [[META9]] = !{} +// X86: [[META10]] = !{i32 0} +// X86: [[META11]] = !{!"none"} +// X86: [[META12]] = !{!"struct StructOneMember"} +// X86: [[META13]] = !{!""} +// X86: [[META14]] = !{i32 1} +// X86: [[META15]] = !{!"struct StructOneMember*"} +// X86: [[META16]] = !{!"struct LargeStructOneMember"} +// X86: [[META17]] = !{!"struct StructTwoMember"} +// X86: [[META18]] = !{!"struct LargeStructTwoMember"} +//. +// AMDGCN: [[META4]] = !{i32 1, i32 1} +// AMDGCN: [[META5]] = !{!"none", !"none"} +// AMDGCN: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} +// AMDGCN: [[META7]] = !{!"", !""} +// AMDGCN: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} +// AMDGCN: [[META9]] = !{} +// AMDGCN: [[META10]] = !{i32 0} +// AMDGCN: [[META11]] = !{!"none"} +// AMDGCN: [[META12]] = !{!"struct StructOneMember"} +// AMDGCN: [[META13]] = !{!""} +// AMDGCN: [[META14]] = !{i32 1} +// AMDGCN: [[META15]] = !{!"struct StructOneMember*"} +// AMDGCN: [[META16]] = !{!"struct LargeStructOneMember"} +// AMDGCN: [[META17]] = !{!"struct StructTwoMember"} +// AMDGCN: [[META18]] = !{!"struct LargeStructTwoMember"} +//. +// AMDGCN20: [[META4]] = !{i32 1, i32 1} +// AMDGCN20: [[META5]] = !{!"none", !"none"} +// AMDGCN20: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} +// AMDGCN20: [[META7]] = !{!"", !""} +// AMDGCN20: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} +// AMDGCN20: [[META9]] = !{} +// AMDGCN20: [[META10]] = !{i32 0} +// AMDGCN20: [[META11]] = !{!"none"} +// AMDGCN20: [[META12]] = !{!"struct StructOneMember"} +// AMDGCN20: [[META13]] = !{!""} +// AMDGCN20: [[META14]] = !{i32 1} +// AMDGCN20: [[META15]] = !{!"struct StructOneMember*"} +// AMDGCN20: [[META16]] = !{!"struct LargeStructOneMember"} +// AMDGCN20: [[META17]] = !{!"struct StructTwoMember"} +// AMDGCN20: [[META18]] = !{!"struct LargeStructTwoMember"} +//. +// SPIR: [[META3]] = !{i32 1, i32 1} +// SPIR: [[META4]] = !{!"none", !"none"} +// SPIR: [[META5]] = !{!"Mat3X3*", !"Mat4X4*"} +// SPIR: [[META6]] = !{!"", !""} +// SPIR: [[META7]] = !{!"Mat32X32*", !"Mat64X64*"} +// SPIR: [[META8]] = !{} +// SPIR: [[META9]] = !{i32 0} +// SPIR: [[META10]] = !{!"none"} +// SPIR: [[META11]] = !{!"struct StructOneMember"} +// SPIR: [[META12]] = !{!""} +// SPIR: [[META13]] = !{i32 1} +// SPIR: [[META14]] = !{!"struct StructOneMember*"} +// SPIR: [[META15]] = !{!"struct LargeStructOneMember"} +// SPIR: [[META16]] = !{!"struct StructTwoMember"} +// SPIR: [[META17]] = !{!"struct LargeStructTwoMember"} +//. +// AMDGCN30-GVAR: [[META4]] = !{i32 1, i32 1} +// AMDGCN30-GVAR: [[META5]] = !{!"none", !"none"} +// AMDGCN30-GVAR: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} +// AMDGCN30-GVAR: [[META7]] = !{!"", !""} +// AMDGCN30-GVAR: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} +// AMDGCN30-GVAR: [[META9]] = !{} +// AMDGCN30-GVAR: [[META10]] = !{i32 0} +// AMDGCN30-GVAR: [[META11]] = !{!"none"} +// AMDGCN30-GVAR: [[META12]] = !{!"struct StructOneMember"} +// AMDGCN30-GVAR: [[META13]] = !{!""} +// AMDGCN30-GVAR: [[META14]] = !{i32 1} +// AMDGCN30-GVAR: [[META15]] = !{!"struct StructOneMember*"} +// AMDGCN30-GVAR: [[META16]] = !{!"struct LargeStructOneMember"} +// AMDGCN30-GVAR: [[META17]] = !{!"struct StructTwoMember"} +// AMDGCN30-GVAR: [[META18]] = !{!"struct LargeStructTwoMember"} +//. +// AMDGCN30: [[META4]] = !{i32 1, i32 1} +// AMDGCN30: [[META5]] = !{!"none", !"none"} +// AMDGCN30: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} +// AMDGCN30: [[META7]] = !{!"", !""} +// AMDGCN30: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} +// AMDGCN30: [[META9]] = !{} +// AMDGCN30: [[META10]] = !{i32 0} +// AMDGCN30: [[META11]] = !{!"none"} +// AMDGCN30: [[META12]] = !{!"struct StructOneMember"} +// AMDGCN30: [[META13]] = !{!""} +// AMDGCN30: [[META14]] = !{i32 1} +// AMDGCN30: [[META15]] = !{!"struct StructOneMember*"} +// AMDGCN30: [[META16]] = !{!"struct LargeStructOneMember"} +// AMDGCN30: [[META17]] = !{!"struct StructTwoMember"} +// AMDGCN30: [[META18]] = !{!"struct LargeStructTwoMember"} +//. diff --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl index f26495bc44aa3..c847f5850b223 100644 --- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl +++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl @@ -1,67 +1,107 @@ -// RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s -// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck -check-prefixes=CL12 %s +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck -check-prefixes=CL20 %s -// CL12-LABEL: define{{.*}} void @func1(ptr addrspace(5) noundef %x) -// CL20-LABEL: define{{.*}} void @func1(ptr noundef %x) +// CL12-LABEL: define dso_local void @func1( +// CL12-SAME: ptr addrspace(5) noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// CL12-NEXT: [[ENTRY:.*:]] +// CL12-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// CL12-NEXT: store ptr addrspace(5) [[X]], ptr addrspace(5) [[X_ADDR]], align 4 +// CL12-NEXT: [[TMP0:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[X_ADDR]], align 4 +// CL12-NEXT: store i32 1, ptr addrspace(5) [[TMP0]], align 4 +// CL12-NEXT: ret void +// +// CL20-LABEL: define dso_local void @func1( +// CL20-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// CL20-NEXT: [[ENTRY:.*:]] +// CL20-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CL20-NEXT: store ptr [[X]], ptr addrspace(5) [[X_ADDR]], align 8 +// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[X_ADDR]], align 8 +// CL20-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CL20-NEXT: ret void +// void func1(int *x) { - // CL12: %[[x_addr:.*]] = alloca ptr addrspace(5){{.*}}addrspace(5) - // CL12: store ptr addrspace(5) %x, ptr addrspace(5) %[[x_addr]] - // CL12: %[[r0:.*]] = load ptr addrspace(5), ptr addrspace(5) %[[x_addr]] - // CL12: store i32 1, ptr addrspace(5) %[[r0]] - // CL20: %[[x_addr:.*]] = alloca ptr{{.*}}addrspace(5) - // CL20: store ptr %x, ptr addrspace(5) %[[x_addr]] - // CL20: %[[r0:.*]] = load ptr, ptr addrspace(5) %[[x_addr]] - // CL20: store i32 1, ptr %[[r0]] *x = 1; } -// CHECK-LABEL: define{{.*}} void @func2() +// CL12-LABEL: define dso_local void @func2( +// CL12-SAME: ) #[[ATTR0]] { +// CL12-NEXT: [[ENTRY:.*:]] +// CL12-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5) +// CL12-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5) +// CL12-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5) +// CL12-NEXT: [[LP1:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// CL12-NEXT: [[LP2:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// CL12-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5) +// CL12-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4 +// CL12-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4 +// CL12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0 +// CL12-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4 +// CL12-NEXT: store ptr addrspace(5) [[LV1]], ptr addrspace(5) [[LP1]], align 4 +// CL12-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0 +// CL12-NEXT: store ptr addrspace(5) [[ARRAYDECAY]], ptr addrspace(5) [[LP2]], align 4 +// CL12-NEXT: call void @func1(ptr addrspace(5) noundef [[LV1]]) #[[ATTR2:[0-9]+]] +// CL12-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4 +// CL12-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4 +// CL12-NEXT: ret void +// +// CL20-LABEL: define dso_local void @func2( +// CL20-SAME: ) #[[ATTR0]] { +// CL20-NEXT: [[ENTRY:.*:]] +// CL20-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5) +// CL20-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5) +// CL20-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5) +// CL20-NEXT: [[LP1:%.*]] = alloca ptr, align 8, addrspace(5) +// CL20-NEXT: [[LP2:%.*]] = alloca ptr, align 8, addrspace(5) +// CL20-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5) +// CL20-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4 +// CL20-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4 +// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0 +// CL20-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4 +// CL20-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr +// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr addrspace(5) [[LP1]], align 8 +// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0 +// CL20-NEXT: [[ARRAYDECAY_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARRAYDECAY]] to ptr +// CL20-NEXT: store ptr [[ARRAYDECAY_ASCAST]], ptr addrspace(5) [[LP2]], align 8 +// CL20-NEXT: [[LV1_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr +// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST1]]) #[[ATTR2:[0-9]+]] +// CL20-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4 +// CL20-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4 +// CL20-NEXT: ret void +// void func2(void) { - // CHECK: %lv1 = alloca i32, align 4, addrspace(5) - // CHECK: %lv2 = alloca i32, align 4, addrspace(5) - // CHECK: %la = alloca [100 x i32], align 4, addrspace(5) - // CL12: %lp1 = alloca ptr addrspace(5), align 4, addrspace(5) - // CL12: %lp2 = alloca ptr addrspace(5), align 4, addrspace(5) - // CL20: %lp1 = alloca ptr, align 8, addrspace(5) - // CL20: %lp2 = alloca ptr, align 8, addrspace(5) - // CHECK: %lvc = alloca i32, align 4, addrspace(5) - - // CHECK: store i32 1, ptr addrspace(5) %lv1 int lv1; lv1 = 1; - // CHECK: store i32 2, ptr addrspace(5) %lv2 + int lv2 = 2; - // CHECK: %[[arrayidx:.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) %la, i64 0, i64 0 - // CHECK: store i32 3, ptr addrspace(5) %[[arrayidx]], align 4 int la[100]; la[0] = 3; - // CL12: store ptr addrspace(5) %lv1, ptr addrspace(5) %lp1, align 4 - // CL20: %[[r0:.*]] = addrspacecast ptr addrspace(5) %lv1 to ptr - // CL20: store ptr %[[r0]], ptr addrspace(5) %lp1, align 8 int *lp1 = &lv1; - // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) %la, i64 0, i64 0 - // CL12: store ptr addrspace(5) %[[arraydecay]], ptr addrspace(5) %lp2, align 4 - // CL20: %[[r1:.*]] = addrspacecast ptr addrspace(5) %[[arraydecay]] to ptr - // CL20: store ptr %[[r1]], ptr addrspace(5) %lp2, align 8 int *lp2 = la; - // CL12: call void @func1(ptr addrspace(5) noundef %lv1) - // CL20: %[[r2:.*]] = addrspacecast ptr addrspace(5) %lv1 to ptr - // CL20: call void @func1(ptr noundef %[[r2]]) func1(&lv1); - // CHECK: store i32 4, ptr addrspace(5) %lvc - // CHECK: store i32 4, ptr addrspace(5) %lv1 const int lvc = 4; lv1 = lvc; } -// CHECK-LABEL: define{{.*}} void @func3() -// CHECK: %a = alloca [16 x [1 x float]], align 4, addrspace(5) -// CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 %a, i8 0, i64 64, i1 false) +// CL12-LABEL: define dso_local void @func3( +// CL12-SAME: ) #[[ATTR0]] { +// CL12-NEXT: [[ENTRY:.*:]] +// CL12-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5) +// CL12-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false) +// CL12-NEXT: ret void +// +// CL20-LABEL: define dso_local void @func3( +// CL20-SAME: ) #[[ATTR0]] { +// CL20-NEXT: [[ENTRY:.*:]] +// CL20-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5) +// CL20-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false) +// CL20-NEXT: ret void +// void func3(void) { float a[16][1] = {{0.}}; }