From ca9196266653dd796abfad9b4030090fc299eb8a Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 15 Oct 2024 22:49:17 +0100 Subject: [PATCH 1/7] `private` is a terrible default AS, and is terrible and wrong for OCL 2.0; fix the latter. --- clang/lib/Basic/Targets/AMDGPU.cpp | 6 +- clang/lib/CodeGen/CGBlocks.cpp | 3 +- clang/lib/CodeGen/CGBuiltin.cpp | 5 +- .../CodeGenOpenCL/addr-space-struct-arg.cl | 1521 ++++++++++++++++- .../amdgcn-automatic-variable.cl | 126 +- .../amdgpu-abi-struct-arg-byref.cl | 282 +-- .../CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 495 +++--- clang/test/CodeGenOpenCL/amdgpu-nullptr.cl | 28 +- clang/test/CodeGenOpenCL/atomic-ops.cl | 1027 ++++++++--- .../atomics-unsafe-hw-remarks-gfx90a.cl | 6 +- clang/test/CodeGenOpenCL/blocks.cl | 23 +- clang/test/CodeGenOpenCL/builtins-alloca.cl | 432 ++++- .../CodeGenOpenCL/builtins-amdgcn-gfx12.cl | 155 +- .../CodeGenOpenCL/builtins-amdgcn-gfx940.cl | 30 +- .../builtins-fp-atomics-gfx12.cl | 4 +- .../CodeGenOpenCL/builtins-fp-atomics-gfx8.cl | 2 +- .../builtins-fp-atomics-gfx90a.cl | 2 +- .../enqueue-kernel-non-entry-block.cl | 2 +- clang/test/CodeGenOpenCL/opencl_types.cl | 2 +- clang/test/Index/pipe-size.cl | 4 +- 20 files changed, 3355 insertions(+), 800 deletions(-) diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 3b748d0249d57..078819183afda 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -260,9 +260,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { TargetInfo::adjust(Diags, Opts); // ToDo: There are still a few places using default address space as private - // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL - // can be removed from the following line. - setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || + // address space in OpenCL, which needs to be cleaned up, then the references + // to OpenCL can be removed from the following line. + setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) || !isAMDGCN(getTriple())); } diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 684fda7440731..c3a266285011f 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1397,7 +1397,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, DI->setLocation(D->getLocation()); DI->EmitDeclareOfBlockLiteralArgVariable( *BlockInfo, D->getName(), argNum, - cast(alloc.getPointer()), Builder); + cast(alloc.getPointer()->stripPointerCasts()), + Builder); } } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 157e743a39bfb..1b01484a46cf7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5858,7 +5858,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); llvm::Value *TmpPtr = Tmp.getPointer(); llvm::Value *TmpSize = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr); + CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), + TmpPtr->stripPointerCasts()); llvm::Value *ElemPtr; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. @@ -5903,7 +5904,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto Call = RValue::get( EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); if (TmpSize) - EmitLifetimeEnd(TmpSize, TmpPtr); + EmitLifetimeEnd(TmpSize, TmpPtr->stripPointerCasts()); return Call; } // Any calls now have event arguments passed. diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index bab0e21067eea..cb26fc6e8fcab 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -1,9 +1,10 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=ALL,X86 %s -// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s -// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s -// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s -// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn -cl-ext=+__opencl_c_program_scope_global_variables | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s -// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --version 5 +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes=X86 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN20 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -check-prefixes=SPIR %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn -cl-ext=+__opencl_c_program_scope_global_variables | FileCheck -check-prefixes=AMDGCN30 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN30-GVAR %s typedef int int2 __attribute__((ext_vector_type(2))); @@ -45,147 +46,1505 @@ struct LargeStructTwoMember { struct LargeStructOneMember g_s; #endif -// X86-LABEL: define{{.*}} void @foo(ptr dead_on_unwind noalias writable sret(%struct.Mat4X4) align 4 %agg.result, ptr noundef byval(%struct.Mat3X3) align 4 %in) -// AMDGCN-LABEL: define{{.*}} %struct.Mat4X4 @foo([9 x i32] %in.coerce) Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { Mat4X4 out; return out; } -// ALL-LABEL: define {{.*}} void @ker -// Expect two mem copies: one for the argument "in", and one for -// the return value. -// X86: call void @llvm.memcpy.p0.p1.i32(ptr -// X86: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) - -// AMDGCN: load [9 x i32], ptr addrspace(1) -// AMDGCN: call %struct.Mat4X4 @foo([9 x i32] -// AMDGCN: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { out[0] = foo(in[1]); } -// X86-LABEL: define{{.*}} void @foo_large(ptr dead_on_unwind noalias writable sret(%struct.Mat64X64) align 4 %agg.result, ptr noundef byval(%struct.Mat32X32) align 4 %in) -// AMDGCN-LABEL: define{{.*}} void @foo_large(ptr addrspace(5) dead_on_unwind noalias writable sret(%struct.Mat64X64) align 4 %agg.result, ptr addrspace(5) noundef byref(%struct.Mat32X32) align 4 %{{.*}} -// AMDGCN: %in = alloca %struct.Mat32X32, align 4, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 %in, ptr addrspace(5) align 4 %{{.*}}, i64 4096, i1 false) Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { Mat64X64 out; return out; } -// ALL-LABEL: define {{.*}} void @ker_large -// Expect two mem copies: one for the argument "in", and one for -// the return value. -// X86: call void @llvm.memcpy.p0.p1.i32(ptr -// X86: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) -// AMDGCN: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) -// AMDGCN: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { out[0] = foo_large(in[1]); } -// AMDGCN-LABEL: define{{.*}} void @FuncOneMember(<2 x i32> %u.coerce) void FuncOneMember(struct StructOneMember u) { u.x = (int2)(0, 0); } -// AMDGCN-LABEL: define{{.*}} void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %{{.*}} -// AMDGCN: %u = alloca %struct.LargeStructOneMember, align 8, addrspace(5) -// AMDGCN: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 %u, ptr addrspace(5) align 8 %{{.*}}, i64 800, i1 false) -// AMDGCN-NOT: addrspacecast -// AMDGCN: store <2 x i32> %{{.*}}, ptr addrspace(5) void FuncOneLargeMember(struct LargeStructOneMember u) { u.x[0] = (int2)(0, 0); } -// AMDGCN20-LABEL: define{{.*}} void @test_indirect_arg_globl() -// AMDGCN20: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) -// AMDGCN20: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 %[[byval_temp]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) -// AMDGCN20: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) #if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables)) void test_indirect_arg_globl(void) { FuncOneLargeMember(g_s); } #endif -// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @test_indirect_arg_local() -// AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) -// AMDGCN: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 %[[byval_temp]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) -// AMDGCN: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) kernel void test_indirect_arg_local(void) { local struct LargeStructOneMember l_s; FuncOneLargeMember(l_s); } -// AMDGCN-LABEL: define{{.*}} void @test_indirect_arg_private() -// AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) -// AMDGCN-NOT: @llvm.memcpy -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[p_s]]) void test_indirect_arg_private(void) { struct LargeStructOneMember p_s; FuncOneLargeMember(p_s); } -// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelOneMember -// AMDGCN-SAME: (<2 x i32> %[[u_coerce:.*]]) -// AMDGCN: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5) -// AMDGCN: %[[coerce_dive:.*]] = getelementptr inbounds nuw %struct.StructOneMember, ptr addrspace(5) %[[u]], i32 0, i32 0 -// AMDGCN: store <2 x i32> %[[u_coerce]], ptr addrspace(5) %[[coerce_dive]] -// AMDGCN: call void @FuncOneMember(<2 x i32> kernel void KernelOneMember(struct StructOneMember u) { FuncOneMember(u); } -// SPIR: call void @llvm.memcpy.p0.p1.i32 -// SPIR-NOT: addrspacecast kernel void KernelOneMemberSpir(global struct StructOneMember* u) { FuncOneMember(*u); } -// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) -// AMDGCN: %[[U_ELEM:.*]] = getelementptr inbounds nuw %struct.LargeStructOneMember, ptr addrspace(5) %[[U]], i32 0, i32 0 -// AMDGCN: %[[EXTRACT:.*]] = extractvalue %struct.LargeStructOneMember %u.coerce, 0 -// AMDGCN: store [100 x <2 x i32>] %[[EXTRACT]], ptr addrspace(5) %[[U_ELEM]], align 8 -// AMDGCN: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[U]]) kernel void KernelLargeOneMember(struct LargeStructOneMember u) { FuncOneLargeMember(u); } -// AMDGCN-LABEL: define{{.*}} void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1) void FuncTwoMember(struct StructTwoMember u) { u.y = (int2)(0, 0); } -// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember -// AMDGCN-SAME: (ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) -// AMDGCN: %[[U:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5) -// AMDGCN: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 %[[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) void FuncLargeTwoMember(struct LargeStructTwoMember u) { u.y[0] = (int2)(0, 0); } -// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelTwoMember -// AMDGCN-SAME: (%struct.StructTwoMember %[[u_coerce:.*]]) -// AMDGCN: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5) -// AMDGCN: %[[LD0:.*]] = load <2 x i32>, ptr addrspace(5) -// AMDGCN: %[[LD1:.*]] = load <2 x i32>, ptr addrspace(5) -// AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]]) kernel void KernelTwoMember(struct StructTwoMember u) { FuncTwoMember(u); } -// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeTwoMember -// AMDGCN-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]]) -// AMDGCN: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5) -// AMDGCN: %[[U_PTR0:.*]] = getelementptr inbounds nuw %struct.LargeStructTwoMember, ptr addrspace(5) %[[u]], i32 0, i32 0 -// AMDGCN: %[[EXTRACT0:.*]] = extractvalue %struct.LargeStructTwoMember %u.coerce, 0 -// AMDGCN: store [40 x <2 x i32>] %[[EXTRACT0]], ptr addrspace(5) %[[U_PTR0]] -// AMDGCN: %[[U_PTR1:.*]] = getelementptr inbounds nuw %struct.LargeStructTwoMember, ptr addrspace(5) %[[u]], i32 0, i32 1 -// AMDGCN: %[[EXTRACT1:.*]] = extractvalue %struct.LargeStructTwoMember %u.coerce, 1 -// AMDGCN: store [20 x <2 x i32>] %[[EXTRACT1]], ptr addrspace(5) %[[U_PTR1]] -// AMDGCN: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref(%struct.LargeStructTwoMember) align 8 %[[u]]) kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { FuncLargeTwoMember(u); } +//. +// X86: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 +//. +// AMDGCN: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 +// AMDGCN: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +//. +// AMDGCN20: @g_s = addrspace(1) global %struct.LargeStructOneMember zeroinitializer, align 8 +// AMDGCN20: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 +// AMDGCN20: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +//. +// SPIR: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 +//. +// AMDGCN30: @g_s = addrspace(1) global %struct.LargeStructOneMember zeroinitializer, align 8 +// AMDGCN30: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 +// AMDGCN30: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +//. +// AMDGCN30-GVAR: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 +// AMDGCN30-GVAR: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +//. +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @foo( +// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @ker( +// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 +// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4 +// X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0 +// X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1 +// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false) +// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]] +// X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @foo_large( +// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @ker_large( +// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4 +// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4 +// X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0 +// X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1 +// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false) +// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @FuncOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[X]], align 8 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @FuncOneLargeMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @test_indirect_arg_local( +// X86-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4 +// X86-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @test_indirect_arg_private( +// X86-SAME: ) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @KernelOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @KernelOneMemberSpir( +// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 4 +// X86-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 +// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false) +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @KernelLargeOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @FuncTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[Y]], align 8 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @FuncLargeTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @KernelTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @KernelLargeTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @foo_large( +// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) +// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @FuncOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN-SAME: ) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN20-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0 +// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4 +// AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] +// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4 +// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false) +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local void @foo_large( +// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker_large( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) +// AMDGCN20-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false) +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local void @FuncOneMember( +// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_globl( +// AMDGCN20-SAME: ) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN20-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN20-SAME: ) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8 +// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN20-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN20-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8 +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8 +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN20-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN20-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8 +// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 +// AMDGCN20-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +// AMDGCN20-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN20-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN20-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8 +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false) +// AMDGCN20-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_func void @foo( +// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_kernel void @ker( +// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0 +// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1 +// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false) +// SPIR-NEXT: call spir_func void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]] +// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_func void @foo_large( +// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_kernel void @ker_large( +// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META6]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4 +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0 +// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1 +// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false) +// SPIR-NEXT: call spir_func void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_func void @FuncOneMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_func void @FuncOneLargeMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8 +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_kernel void @test_indirect_arg_local( +// SPIR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 +// SPIR-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) +// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_func void @test_indirect_arg_private( +// SPIR-SAME: ) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 +// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META10:![0-9]+]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12:![0-9]+]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMemberSpir( +// SPIR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8 +// SPIR-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 +// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false) +// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeOneMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_func void @FuncTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[Y]], align 8 +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_func void @FuncLargeTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8 +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_kernel void @KernelTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN30-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN30-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN30-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN30-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] +// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN30-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local void @foo_large( +// AMDGCN30-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker_large( +// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) +// AMDGCN30-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local void @FuncOneMember( +// AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 +// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local void @test_indirect_arg_globl( +// AMDGCN30-SAME: ) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) +// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN30-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN30-SAME: ) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN30-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN30-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 +// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN30-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN30-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN30-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN30-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN30-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN30-GVAR-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN30-GVAR-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN30-GVAR-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] +// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN30-GVAR-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local void @foo_large( +// AMDGCN30-GVAR-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker_large( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) +// AMDGCN30-GVAR-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneMember( +// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN30-GVAR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-GVAR-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN30-GVAR-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-GVAR-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN30-GVAR-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN30-GVAR-NEXT: ret void +// +//. +// X86: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +// X86: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" "uniform-work-group-size"="true" } +// X86: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// X86: attributes #[[ATTR3]] = { convergent nounwind } +//. +// AMDGCN: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// AMDGCN: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// AMDGCN: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// AMDGCN: attributes #[[ATTR3]] = { convergent nounwind } +//. +// AMDGCN20: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// AMDGCN20: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } +// AMDGCN20: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// AMDGCN20: attributes #[[ATTR3]] = { convergent nounwind } +//. +// SPIR: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// SPIR: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// SPIR: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// SPIR: attributes #[[ATTR3]] = { convergent nounwind } +//. +// AMDGCN30: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// AMDGCN30: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } +// AMDGCN30: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// AMDGCN30: attributes #[[ATTR3]] = { convergent nounwind } +//. +// AMDGCN30-GVAR: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// AMDGCN30-GVAR: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } +// AMDGCN30-GVAR: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// AMDGCN30-GVAR: attributes #[[ATTR3]] = { convergent nounwind } +//. +// X86: [[META0:![0-9]+]] = !{i32 1, !"NumRegisterParameters", i32 0} +// X86: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// X86: [[META2:![0-9]+]] = !{i32 1, i32 2} +// X86: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// X86: [[META4]] = !{i32 1, i32 1} +// X86: [[META5]] = !{!"none", !"none"} +// X86: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} +// X86: [[META7]] = !{!"", !""} +// X86: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} +// X86: [[META9]] = !{} +// X86: [[META10]] = !{i32 0} +// X86: [[META11]] = !{!"none"} +// X86: [[META12]] = !{!"struct StructOneMember"} +// X86: [[META13]] = !{!""} +// X86: [[META14]] = !{i32 1} +// X86: [[META15]] = !{!"struct StructOneMember*"} +// X86: [[META16]] = !{!"struct LargeStructOneMember"} +// X86: [[META17]] = !{!"struct StructTwoMember"} +// X86: [[META18]] = !{!"struct LargeStructTwoMember"} +//. +// AMDGCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// AMDGCN: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// AMDGCN: [[META2:![0-9]+]] = !{i32 1, i32 2} +// AMDGCN: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// AMDGCN: [[META4]] = !{i32 1, i32 1} +// AMDGCN: [[META5]] = !{!"none", !"none"} +// AMDGCN: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} +// AMDGCN: [[META7]] = !{!"", !""} +// AMDGCN: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} +// AMDGCN: [[META9]] = !{} +// AMDGCN: [[META10]] = !{i32 0} +// AMDGCN: [[META11]] = !{!"none"} +// AMDGCN: [[META12]] = !{!"struct StructOneMember"} +// AMDGCN: [[META13]] = !{!""} +// AMDGCN: [[META14]] = !{i32 1} +// AMDGCN: [[META15]] = !{!"struct StructOneMember*"} +// AMDGCN: [[META16]] = !{!"struct LargeStructOneMember"} +// AMDGCN: [[META17]] = !{!"struct StructTwoMember"} +// AMDGCN: [[META18]] = !{!"struct LargeStructTwoMember"} +//. +// AMDGCN20: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// AMDGCN20: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// AMDGCN20: [[META2:![0-9]+]] = !{i32 2, i32 0} +// AMDGCN20: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// AMDGCN20: [[META4]] = !{i32 1, i32 1} +// AMDGCN20: [[META5]] = !{!"none", !"none"} +// AMDGCN20: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} +// AMDGCN20: [[META7]] = !{!"", !""} +// AMDGCN20: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} +// AMDGCN20: [[META9]] = !{} +// AMDGCN20: [[META10]] = !{i32 0} +// AMDGCN20: [[META11]] = !{!"none"} +// AMDGCN20: [[META12]] = !{!"struct StructOneMember"} +// AMDGCN20: [[META13]] = !{!""} +// AMDGCN20: [[META14]] = !{i32 1} +// AMDGCN20: [[META15]] = !{!"struct StructOneMember*"} +// AMDGCN20: [[META16]] = !{!"struct LargeStructOneMember"} +// AMDGCN20: [[META17]] = !{!"struct StructTwoMember"} +// AMDGCN20: [[META18]] = !{!"struct LargeStructTwoMember"} +//. +// SPIR: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// SPIR: [[META1:![0-9]+]] = !{i32 1, i32 2} +// SPIR: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// SPIR: [[META3]] = !{i32 1, i32 1} +// SPIR: [[META4]] = !{!"none", !"none"} +// SPIR: [[META5]] = !{!"Mat3X3*", !"Mat4X4*"} +// SPIR: [[META6]] = !{!"", !""} +// SPIR: [[META7]] = !{!"Mat32X32*", !"Mat64X64*"} +// SPIR: [[META8]] = !{} +// SPIR: [[META9]] = !{i32 0} +// SPIR: [[META10]] = !{!"none"} +// SPIR: [[META11]] = !{!"struct StructOneMember"} +// SPIR: [[META12]] = !{!""} +// SPIR: [[META13]] = !{i32 1} +// SPIR: [[META14]] = !{!"struct StructOneMember*"} +// SPIR: [[META15]] = !{!"struct LargeStructOneMember"} +// SPIR: [[META16]] = !{!"struct StructTwoMember"} +// SPIR: [[META17]] = !{!"struct LargeStructTwoMember"} +//. +// AMDGCN30: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// AMDGCN30: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// AMDGCN30: [[META2:![0-9]+]] = !{i32 3, i32 0} +// AMDGCN30: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// AMDGCN30: [[META4]] = !{i32 1, i32 1} +// AMDGCN30: [[META5]] = !{!"none", !"none"} +// AMDGCN30: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} +// AMDGCN30: [[META7]] = !{!"", !""} +// AMDGCN30: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} +// AMDGCN30: [[META9]] = !{} +// AMDGCN30: [[META10]] = !{i32 0} +// AMDGCN30: [[META11]] = !{!"none"} +// AMDGCN30: [[META12]] = !{!"struct StructOneMember"} +// AMDGCN30: [[META13]] = !{!""} +// AMDGCN30: [[META14]] = !{i32 1} +// AMDGCN30: [[META15]] = !{!"struct StructOneMember*"} +// AMDGCN30: [[META16]] = !{!"struct LargeStructOneMember"} +// AMDGCN30: [[META17]] = !{!"struct StructTwoMember"} +// AMDGCN30: [[META18]] = !{!"struct LargeStructTwoMember"} +//. +// AMDGCN30-GVAR: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// AMDGCN30-GVAR: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// AMDGCN30-GVAR: [[META2:![0-9]+]] = !{i32 3, i32 0} +// AMDGCN30-GVAR: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// AMDGCN30-GVAR: [[META4]] = !{i32 1, i32 1} +// AMDGCN30-GVAR: [[META5]] = !{!"none", !"none"} +// AMDGCN30-GVAR: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} +// AMDGCN30-GVAR: [[META7]] = !{!"", !""} +// AMDGCN30-GVAR: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} +// AMDGCN30-GVAR: [[META9]] = !{} +// AMDGCN30-GVAR: [[META10]] = !{i32 0} +// AMDGCN30-GVAR: [[META11]] = !{!"none"} +// AMDGCN30-GVAR: [[META12]] = !{!"struct StructOneMember"} +// AMDGCN30-GVAR: [[META13]] = !{!""} +// AMDGCN30-GVAR: [[META14]] = !{i32 1} +// AMDGCN30-GVAR: [[META15]] = !{!"struct StructOneMember*"} +// AMDGCN30-GVAR: [[META16]] = !{!"struct LargeStructOneMember"} +// AMDGCN30-GVAR: [[META17]] = !{!"struct StructTwoMember"} +// AMDGCN30-GVAR: [[META18]] = !{!"struct LargeStructTwoMember"} +//. diff --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl index f26495bc44aa3..1065d7c3b0c90 100644 --- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl +++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl @@ -1,67 +1,111 @@ -// RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s -// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CL12 %s +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CL20 %s -// CL12-LABEL: define{{.*}} void @func1(ptr addrspace(5) noundef %x) -// CL20-LABEL: define{{.*}} void @func1(ptr noundef %x) +// CL12-LABEL: define dso_local void @func1( +// CL12-SAME: ptr addrspace(5) noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// CL12-NEXT: [[ENTRY:.*:]] +// CL12-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// CL12-NEXT: store ptr addrspace(5) [[X]], ptr addrspace(5) [[X_ADDR]], align 4 +// CL12-NEXT: [[TMP0:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[X_ADDR]], align 4 +// CL12-NEXT: store i32 1, ptr addrspace(5) [[TMP0]], align 4 +// CL12-NEXT: ret void +// +// CL20-LABEL: define dso_local void @func1( +// CL20-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// CL20-NEXT: [[ENTRY:.*:]] +// CL20-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CL20-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CL20-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8 +// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 +// CL20-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CL20-NEXT: ret void +// void func1(int *x) { - // CL12: %[[x_addr:.*]] = alloca ptr addrspace(5){{.*}}addrspace(5) - // CL12: store ptr addrspace(5) %x, ptr addrspace(5) %[[x_addr]] - // CL12: %[[r0:.*]] = load ptr addrspace(5), ptr addrspace(5) %[[x_addr]] - // CL12: store i32 1, ptr addrspace(5) %[[r0]] - // CL20: %[[x_addr:.*]] = alloca ptr{{.*}}addrspace(5) - // CL20: store ptr %x, ptr addrspace(5) %[[x_addr]] - // CL20: %[[r0:.*]] = load ptr, ptr addrspace(5) %[[x_addr]] - // CL20: store i32 1, ptr %[[r0]] *x = 1; } -// CHECK-LABEL: define{{.*}} void @func2() +// CL12-LABEL: define dso_local void @func2( +// CL12-SAME: ) #[[ATTR0]] { +// CL12-NEXT: [[ENTRY:.*:]] +// CL12-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5) +// CL12-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5) +// CL12-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5) +// CL12-NEXT: [[LP1:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// CL12-NEXT: [[LP2:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// CL12-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5) +// CL12-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4 +// CL12-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4 +// CL12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0 +// CL12-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4 +// CL12-NEXT: store ptr addrspace(5) [[LV1]], ptr addrspace(5) [[LP1]], align 4 +// CL12-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0 +// CL12-NEXT: store ptr addrspace(5) [[ARRAYDECAY]], ptr addrspace(5) [[LP2]], align 4 +// CL12-NEXT: call void @func1(ptr addrspace(5) noundef [[LV1]]) #[[ATTR2:[0-9]+]] +// CL12-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4 +// CL12-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4 +// CL12-NEXT: ret void +// +// CL20-LABEL: define dso_local void @func2( +// CL20-SAME: ) #[[ATTR0]] { +// CL20-NEXT: [[ENTRY:.*:]] +// CL20-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5) +// CL20-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5) +// CL20-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5) +// CL20-NEXT: [[LP1:%.*]] = alloca ptr, align 8, addrspace(5) +// CL20-NEXT: [[LP2:%.*]] = alloca ptr, align 8, addrspace(5) +// CL20-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5) +// CL20-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr +// CL20-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] to ptr +// CL20-NEXT: [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to ptr +// CL20-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] to ptr +// CL20-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] to ptr +// CL20-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] to ptr +// CL20-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4 +// CL20-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4 +// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0 +// CL20-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4 +// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8 +// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0 +// CL20-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8 +// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]] +// CL20-NEXT: store i32 4, ptr [[LVC_ASCAST]], align 4 +// CL20-NEXT: store i32 4, ptr [[LV1_ASCAST]], align 4 +// CL20-NEXT: ret void +// void func2(void) { - // CHECK: %lv1 = alloca i32, align 4, addrspace(5) - // CHECK: %lv2 = alloca i32, align 4, addrspace(5) - // CHECK: %la = alloca [100 x i32], align 4, addrspace(5) - // CL12: %lp1 = alloca ptr addrspace(5), align 4, addrspace(5) - // CL12: %lp2 = alloca ptr addrspace(5), align 4, addrspace(5) - // CL20: %lp1 = alloca ptr, align 8, addrspace(5) - // CL20: %lp2 = alloca ptr, align 8, addrspace(5) - // CHECK: %lvc = alloca i32, align 4, addrspace(5) - - // CHECK: store i32 1, ptr addrspace(5) %lv1 int lv1; lv1 = 1; - // CHECK: store i32 2, ptr addrspace(5) %lv2 int lv2 = 2; - // CHECK: %[[arrayidx:.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) %la, i64 0, i64 0 - // CHECK: store i32 3, ptr addrspace(5) %[[arrayidx]], align 4 int la[100]; la[0] = 3; - // CL12: store ptr addrspace(5) %lv1, ptr addrspace(5) %lp1, align 4 - // CL20: %[[r0:.*]] = addrspacecast ptr addrspace(5) %lv1 to ptr - // CL20: store ptr %[[r0]], ptr addrspace(5) %lp1, align 8 int *lp1 = &lv1; - // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) %la, i64 0, i64 0 - // CL12: store ptr addrspace(5) %[[arraydecay]], ptr addrspace(5) %lp2, align 4 - // CL20: %[[r1:.*]] = addrspacecast ptr addrspace(5) %[[arraydecay]] to ptr - // CL20: store ptr %[[r1]], ptr addrspace(5) %lp2, align 8 int *lp2 = la; - // CL12: call void @func1(ptr addrspace(5) noundef %lv1) - // CL20: %[[r2:.*]] = addrspacecast ptr addrspace(5) %lv1 to ptr - // CL20: call void @func1(ptr noundef %[[r2]]) func1(&lv1); - // CHECK: store i32 4, ptr addrspace(5) %lvc - // CHECK: store i32 4, ptr addrspace(5) %lv1 const int lvc = 4; lv1 = lvc; } -// CHECK-LABEL: define{{.*}} void @func3() -// CHECK: %a = alloca [16 x [1 x float]], align 4, addrspace(5) -// CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 %a, i8 0, i64 64, i1 false) +// CL12-LABEL: define dso_local void @func3( +// CL12-SAME: ) #[[ATTR0]] { +// CL12-NEXT: [[ENTRY:.*:]] +// CL12-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5) +// CL12-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false) +// CL12-NEXT: ret void +// +// CL20-LABEL: define dso_local void @func3( +// CL20-SAME: ) #[[ATTR0]] { +// CL20-NEXT: [[ENTRY:.*:]] +// CL20-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5) +// CL20-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CL20-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, i64 64, i1 false) +// CL20-NEXT: ret void +// void func3(void) { float a[16][1] = {{0.}}; } diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl index a5f682646d338..084281a8cada4 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefix=AMDGCN %s typedef int int2 __attribute__((ext_vector_type(2))); @@ -42,14 +42,16 @@ struct LargeStructOneMember g_s; #endif -// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo -// AMDGCN-SAME: ([9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) -// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 -// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 -// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// AMDGCN-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0 +// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4 // AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] // Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { @@ -60,36 +62,40 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { // Expect two mem copies: one for the argument "in", and one for // the return value. -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker -// AMDGCN-SAME: (ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 // AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 -// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4 +// AMDGCN-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false) // AMDGCN-NEXT: ret void // kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { out[0] = foo(in[1]); } -// AMDGCN-LABEL: define dso_local void @foo_large -// AMDGCN-SAME: (ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: entry: -// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN-LABEL: define dso_local void @foo_large( +// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) // AMDGCN-NEXT: ret void // Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { @@ -97,56 +103,63 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { return out; } -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large -// AMDGCN-SAME: (ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) -// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false) // AMDGCN-NEXT: ret void // kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { out[0] = foo_large(in[1]); } -// AMDGCN-LABEL: define dso_local void @FuncOneMember -// AMDGCN-SAME: (<2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local void @FuncOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 // AMDGCN-NEXT: ret void // void FuncOneMember(struct StructOneMember u) { u.x = (int2)(0, 0); } -// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember -// AMDGCN-SAME: (ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: entry: -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 // AMDGCN-NEXT: ret void // void FuncOneLargeMember(struct LargeStructOneMember u) { @@ -154,9 +167,9 @@ void FuncOneLargeMember(struct LargeStructOneMember u) { } #if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables)) -// AMDGCN-LABEL: define dso_local void @test_indirect_arg_globl -// AMDGCN-SAME: () #[[ATTR0]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local void @test_indirect_arg_globl( +// AMDGCN-SAME: ) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) // AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] @@ -167,9 +180,9 @@ void test_indirect_arg_globl(void) { } #endif -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local -// AMDGCN-SAME: () #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) // AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] @@ -180,11 +193,14 @@ kernel void test_indirect_arg_local(void) { FuncOneLargeMember(l_s); } -// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private -// AMDGCN-SAME: () #[[ATTR0]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN-SAME: ) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false) +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] // AMDGCN-NEXT: ret void // void test_indirect_arg_private(void) { @@ -192,14 +208,15 @@ void test_indirect_arg_private(void) { FuncOneLargeMember(p_s); } -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember -// AMDGCN-SAME: (<2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8 // AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] // AMDGCN-NEXT: ret void // @@ -207,12 +224,13 @@ kernel void KernelOneMember(struct StructOneMember u) { FuncOneMember(u); } -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir -// AMDGCN-SAME: (ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr +// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 // AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] @@ -222,70 +240,78 @@ kernel void KernelOneMemberSpir(global struct StructOneMember* u) { FuncOneMember(*u); } -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember -// AMDGCN-SAME: ([[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false) +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] // AMDGCN-NEXT: ret void // kernel void KernelLargeOneMember(struct LargeStructOneMember u) { FuncOneLargeMember(u); } -// AMDGCN-LABEL: define dso_local void @FuncTwoMember -// AMDGCN-SAME: (<2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8 +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8 // AMDGCN-NEXT: ret void // void FuncTwoMember(struct StructTwoMember u) { u.y = (int2)(0, 0); } -// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember -// AMDGCN-SAME: (ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: entry: -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 // AMDGCN-NEXT: ret void // void FuncLargeTwoMember(struct LargeStructTwoMember u) { u.y[0] = (int2)(0, 0); } -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember -// AMDGCN-SAME: ([[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 +// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 // AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] // AMDGCN-NEXT: ret void // @@ -293,19 +319,39 @@ kernel void KernelTwoMember(struct StructTwoMember u) { FuncTwoMember(u); } -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember -// AMDGCN-SAME: ([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: entry: +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8 +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false) +// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] // AMDGCN-NEXT: ret void // kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { FuncLargeTwoMember(u); } +//. +// AMDGCN: [[META4]] = !{i32 1, i32 1} +// AMDGCN: [[META5]] = !{!"none", !"none"} +// AMDGCN: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} +// AMDGCN: [[META7]] = !{!"", !""} +// AMDGCN: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} +// AMDGCN: [[META9]] = !{} +// AMDGCN: [[META10]] = !{i32 0} +// AMDGCN: [[META11]] = !{!"none"} +// AMDGCN: [[META12]] = !{!"struct StructOneMember"} +// AMDGCN: [[META13]] = !{!""} +// AMDGCN: [[META14]] = !{i32 1} +// AMDGCN: [[META15]] = !{!"struct StructOneMember*"} +// AMDGCN: [[META16]] = !{!"struct LargeStructOneMember"} +// AMDGCN: [[META17]] = !{!"struct StructTwoMember"} +// AMDGCN: [[META18]] = !{!"struct LargeStructTwoMember"} +//. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 08b28f1814c1f..2fc7f9a24b887 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -70,11 +70,13 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: entry: // NOCPU-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5) // NOCPU-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// NOCPU-NEXT: store i64 [[ID]], ptr addrspace(5) [[ID_ADDR]], align 8 -// NOCPU-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(5) [[ID_ADDR]], align 8 -// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(5) [[ID_ADDR]], align 8 +// NOCPU-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr +// NOCPU-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// NOCPU-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8 // NOCPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]] // NOCPU-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8 // NOCPU-NEXT: ret void @@ -101,96 +103,108 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) // NOCPU-NEXT: [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) -// NOCPU-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 -// NOCPU-NEXT: store i8 [[B]], ptr addrspace(5) [[B_ADDR]], align 1 -// NOCPU-NEXT: store ptr addrspace(1) [[C]], ptr addrspace(5) [[C_ADDR]], align 8 -// NOCPU-NEXT: store i64 [[D]], ptr addrspace(5) [[D_ADDR]], align 8 -// NOCPU-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4 -// NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 -// NOCPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 -// NOCPU-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) -// NOCPU-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 0 -// NOCPU-NEXT: store i32 25, ptr addrspace(5) [[BLOCK_SIZE]], align 8 -// NOCPU-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 1 -// NOCPU-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN]], align 4 -// NOCPU-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 2 -// NOCPU-NEXT: store ptr @__test_block_invoke, ptr addrspace(5) [[BLOCK_INVOKE]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 3 -// NOCPU-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 -// NOCPU-NEXT: store ptr addrspace(1) [[TMP2]], ptr addrspace(5) [[BLOCK_CAPTURED]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 4 -// NOCPU-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1 -// NOCPU-NEXT: store i8 [[TMP3]], ptr addrspace(5) [[BLOCK_CAPTURED1]], align 8 -// NOCPU-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr -// NOCPU-NEXT: [[TMP5:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_block_invoke_kernel, ptr [[TMP4]]) -// NOCPU-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 -// NOCPU-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 -// NOCPU-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP2]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) -// NOCPU-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 0 -// NOCPU-NEXT: store i32 41, ptr addrspace(5) [[BLOCK_SIZE4]], align 8 -// NOCPU-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 1 -// NOCPU-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN5]], align 4 -// NOCPU-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 2 -// NOCPU-NEXT: store ptr @__test_block_invoke_2, ptr addrspace(5) [[BLOCK_INVOKE6]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 3 -// NOCPU-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 -// NOCPU-NEXT: store ptr addrspace(1) [[TMP8]], ptr addrspace(5) [[BLOCK_CAPTURED7]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 6 -// NOCPU-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1 -// NOCPU-NEXT: store i8 [[TMP9]], ptr addrspace(5) [[BLOCK_CAPTURED8]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 4 -// NOCPU-NEXT: [[TMP10:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8 -// NOCPU-NEXT: store ptr addrspace(1) [[TMP10]], ptr addrspace(5) [[BLOCK_CAPTURED9]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 5 -// NOCPU-NEXT: [[TMP11:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8 -// NOCPU-NEXT: store i64 [[TMP11]], ptr addrspace(5) [[BLOCK_CAPTURED10]], align 8 -// NOCPU-NEXT: [[TMP12:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr -// NOCPU-NEXT: [[TMP13:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP6]], i32 [[TMP7]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[VARTMP2]], ptr @__test_block_invoke_2_kernel, ptr [[TMP12]]) -// NOCPU-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 -// NOCPU-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 -// NOCPU-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP11]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) -// NOCPU-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 0 -// NOCPU-NEXT: store i32 41, ptr addrspace(5) [[BLOCK_SIZE13]], align 8 -// NOCPU-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 1 -// NOCPU-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN14]], align 4 -// NOCPU-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 2 -// NOCPU-NEXT: store ptr @__test_block_invoke_3, ptr addrspace(5) [[BLOCK_INVOKE15]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 3 -// NOCPU-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 -// NOCPU-NEXT: store ptr addrspace(1) [[TMP16]], ptr addrspace(5) [[BLOCK_CAPTURED16]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 6 -// NOCPU-NEXT: [[TMP17:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1 -// NOCPU-NEXT: store i8 [[TMP17]], ptr addrspace(5) [[BLOCK_CAPTURED17]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 4 -// NOCPU-NEXT: [[TMP18:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8 -// NOCPU-NEXT: store ptr addrspace(1) [[TMP18]], ptr addrspace(5) [[BLOCK_CAPTURED18]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 5 -// NOCPU-NEXT: [[TMP19:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8 -// NOCPU-NEXT: store i64 [[TMP19]], ptr addrspace(5) [[BLOCK_CAPTURED19]], align 8 -// NOCPU-NEXT: [[TMP20:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr -// NOCPU-NEXT: [[TMP21:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0 -// NOCPU-NEXT: store i64 100, ptr addrspace(5) [[TMP21]], align 8 -// NOCPU-NEXT: [[TMP22:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP14]], i32 [[TMP15]], ptr addrspace(5) [[VARTMP11]], ptr @__test_block_invoke_3_kernel, ptr [[TMP20]], i32 1, ptr addrspace(5) [[TMP21]]) -// NOCPU-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 0 -// NOCPU-NEXT: store i32 32, ptr addrspace(5) [[BLOCK_SIZE22]], align 8 -// NOCPU-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 1 -// NOCPU-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN23]], align 4 -// NOCPU-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 2 -// NOCPU-NEXT: store ptr @__test_block_invoke_4, ptr addrspace(5) [[BLOCK_INVOKE24]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 3 -// NOCPU-NEXT: [[TMP23:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8 -// NOCPU-NEXT: store i64 [[TMP23]], ptr addrspace(5) [[BLOCK_CAPTURED25]], align 8 -// NOCPU-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 4 -// NOCPU-NEXT: [[TMP24:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8 -// NOCPU-NEXT: store ptr addrspace(1) [[TMP24]], ptr addrspace(5) [[BLOCK_CAPTURED26]], align 8 +// NOCPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// NOCPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// NOCPU-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// NOCPU-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr +// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr +// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr +// NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// NOCPU-NEXT: [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr +// NOCPU-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr +// NOCPU-NEXT: [[BLOCK3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr +// NOCPU-NEXT: [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr +// NOCPU-NEXT: [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr +// NOCPU-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr +// NOCPU-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr // NOCPU-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr -// NOCPU-NEXT: store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8 -// NOCPU-NEXT: [[TMP25:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 -// NOCPU-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 -// NOCPU-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP27]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) -// NOCPU-NEXT: [[TMP27:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8 -// NOCPU-NEXT: [[TMP28:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr -// NOCPU-NEXT: [[TMP29:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP25]], i32 [[TMP26]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[VARTMP27]], ptr @__test_block_invoke_4_kernel, ptr [[TMP28]]) +// NOCPU-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr +// NOCPU-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1 +// NOCPU-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4 +// NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 +// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) +// NOCPU-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0 +// NOCPU-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8 +// NOCPU-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1 +// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN]], align 4 +// NOCPU-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2 +// NOCPU-NEXT: store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3 +// NOCPU-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4 +// NOCPU-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1 +// NOCPU-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8 +// NOCPU-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr byval([[STRUCT_NDRANGE_T]]) [[TMP_ASCAST]], ptr @__test_block_invoke_kernel, ptr [[BLOCK_ASCAST]]) +// NOCPU-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 +// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) +// NOCPU-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0 +// NOCPU-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8 +// NOCPU-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1 +// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN5]], align 4 +// NOCPU-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2 +// NOCPU-NEXT: store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3 +// NOCPU-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6 +// NOCPU-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1 +// NOCPU-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4 +// NOCPU-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5 +// NOCPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8 +// NOCPU-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr byval([[STRUCT_NDRANGE_T]]) [[TMP2_ASCAST]], ptr @__test_block_invoke_2_kernel, ptr [[BLOCK3_ASCAST]]) +// NOCPU-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 +// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) +// NOCPU-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0 +// NOCPU-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8 +// NOCPU-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1 +// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN14]], align 4 +// NOCPU-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2 +// NOCPU-NEXT: store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3 +// NOCPU-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6 +// NOCPU-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1 +// NOCPU-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4 +// NOCPU-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5 +// NOCPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8 +// NOCPU-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0 +// NOCPU-NEXT: store i64 100, ptr [[TMP18]], align 8 +// NOCPU-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr [[TMP11_ASCAST]], ptr @__test_block_invoke_3_kernel, ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]]) +// NOCPU-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0 +// NOCPU-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8 +// NOCPU-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1 +// NOCPU-NEXT: store i32 8, ptr [[BLOCK_ALIGN23]], align 4 +// NOCPU-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2 +// NOCPU-NEXT: store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3 +// NOCPU-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8 +// NOCPU-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4 +// NOCPU-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8 +// NOCPU-NEXT: store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 +// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) +// NOCPU-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr byval([[STRUCT_NDRANGE_T]]) [[TMP27_ASCAST]], ptr @__test_block_invoke_4_kernel, ptr [[BLOCK21_ASCAST]]) // NOCPU-NEXT: ret void // // @@ -200,8 +214,10 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: entry: // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 -// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8 +// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr +// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr +// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 // NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 @@ -228,8 +244,10 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: entry: // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 -// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8 +// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr +// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr +// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 // NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 @@ -263,9 +281,12 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 -// NOCPU-NEXT: store ptr addrspace(3) [[LP]], ptr addrspace(5) [[LP_ADDR]], align 4 -// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8 +// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr +// NOCPU-NEXT: [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr +// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr +// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4 +// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 // NOCPU-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 @@ -278,7 +299,7 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8 // NOCPU-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0 // NOCPU-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8 -// NOCPU-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[LP_ADDR]], align 4 +// NOCPU-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4 // NOCPU-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0 // NOCPU-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4 // NOCPU-NEXT: ret void @@ -301,8 +322,10 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: entry: // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 -// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8 +// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr +// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr +// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 // NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 @@ -331,13 +354,18 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) // NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) // NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) -// NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr addrspace(5) [[I_ADDR]], align 8 -// NOCPU-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4 +// NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr +// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr +// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr +// NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4 // NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() -// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 -// NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 -// NOCPU-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) -// NOCPU-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) +// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 +// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) +// NOCPU-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr byval([[STRUCT_NDRANGE_T]]) [[TMP_ASCAST]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) // NOCPU-NEXT: ret void // // @@ -347,8 +375,10 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: entry: // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 -// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8 +// NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr +// NOCPU-NEXT: [[BLOCK_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_ADDR]] to ptr +// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR_ASCAST]], align 8 // NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() // NOCPU-NEXT: ret void // @@ -383,11 +413,13 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: entry: // GFX900-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5) // GFX900-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// GFX900-NEXT: store i64 [[ID]], ptr addrspace(5) [[ID_ADDR]], align 8, !tbaa [[TBAA3:![0-9]+]] -// GFX900-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]] -// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(5) [[ID_ADDR]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(5) [[ID_ADDR]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr +// GFX900-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// GFX900-NEXT: store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3:![0-9]+]] +// GFX900-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7:![0-9]+]] +// GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP1]], i64 [[TMP2]] // GFX900-NEXT: store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: ret void @@ -414,102 +446,114 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK20:%.*]] = alloca ptr, align 8, addrspace(5) // GFX900-NEXT: [[BLOCK21:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) // GFX900-NEXT: [[VARTMP27:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) -// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store i8 [[B]], ptr addrspace(5) [[B_ADDR]], align 1, !tbaa [[TBAA13:![0-9]+]] -// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr addrspace(5) [[C_ADDR]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store i64 [[D]], ptr addrspace(5) [[D_ADDR]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// GFX900-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// GFX900-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// GFX900-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr +// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr +// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr +// GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// GFX900-NEXT: [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr +// GFX900-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr +// GFX900-NEXT: [[BLOCK3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr +// GFX900-NEXT: [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr +// GFX900-NEXT: [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr +// GFX900-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr +// GFX900-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr +// GFX900-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr +// GFX900-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr +// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA13:![0-9]+]] +// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8:[0-9]+]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]] -// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14:![0-9]+]] +// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14:![0-9]+]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]] -// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA16:![0-9]+]] -// GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]] -// GFX900-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18:![0-9]+]] -// GFX900-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 0 -// GFX900-NEXT: store i32 25, ptr addrspace(5) [[BLOCK_SIZE]], align 8 -// GFX900-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 1 -// GFX900-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN]], align 4 -// GFX900-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 2 -// GFX900-NEXT: store ptr @__test_block_invoke, ptr addrspace(5) [[BLOCK_INVOKE]], align 8 -// GFX900-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 3 -// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP2]], ptr addrspace(5) [[BLOCK_CAPTURED]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr addrspace(5) [[BLOCK]], i32 0, i32 4 -// GFX900-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1, !tbaa [[TBAA13]] -// GFX900-NEXT: store i8 [[TMP3]], ptr addrspace(5) [[BLOCK_CAPTURED1]], align 8, !tbaa [[TBAA13]] -// GFX900-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr -// GFX900-NEXT: [[TMP5:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_block_invoke_kernel, ptr [[TMP4]]) -// GFX900-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA16]] -// GFX900-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]] -// GFX900-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP2]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]] -// GFX900-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 0 -// GFX900-NEXT: store i32 41, ptr addrspace(5) [[BLOCK_SIZE4]], align 8 -// GFX900-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 1 -// GFX900-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN5]], align 4 -// GFX900-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 2 -// GFX900-NEXT: store ptr @__test_block_invoke_2, ptr addrspace(5) [[BLOCK_INVOKE6]], align 8 -// GFX900-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 3 -// GFX900-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP8]], ptr addrspace(5) [[BLOCK_CAPTURED7]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 6 -// GFX900-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1, !tbaa [[TBAA13]] -// GFX900-NEXT: store i8 [[TMP9]], ptr addrspace(5) [[BLOCK_CAPTURED8]], align 8, !tbaa [[TBAA13]] -// GFX900-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 4 -// GFX900-NEXT: [[TMP10:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP10]], ptr addrspace(5) [[BLOCK_CAPTURED9]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK3]], i32 0, i32 5 -// GFX900-NEXT: [[TMP11:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: store i64 [[TMP11]], ptr addrspace(5) [[BLOCK_CAPTURED10]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: [[TMP12:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK3]] to ptr -// GFX900-NEXT: [[TMP13:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP6]], i32 [[TMP7]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[VARTMP2]], ptr @__test_block_invoke_2_kernel, ptr [[TMP12]]) -// GFX900-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA16]] -// GFX900-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]] -// GFX900-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP11]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]] -// GFX900-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 0 -// GFX900-NEXT: store i32 41, ptr addrspace(5) [[BLOCK_SIZE13]], align 8 -// GFX900-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 1 -// GFX900-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN14]], align 4 -// GFX900-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 2 -// GFX900-NEXT: store ptr @__test_block_invoke_3, ptr addrspace(5) [[BLOCK_INVOKE15]], align 8 -// GFX900-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 3 -// GFX900-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP16]], ptr addrspace(5) [[BLOCK_CAPTURED16]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 6 -// GFX900-NEXT: [[TMP17:%.*]] = load i8, ptr addrspace(5) [[B_ADDR]], align 1, !tbaa [[TBAA13]] -// GFX900-NEXT: store i8 [[TMP17]], ptr addrspace(5) [[BLOCK_CAPTURED17]], align 8, !tbaa [[TBAA13]] -// GFX900-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 4 -// GFX900-NEXT: [[TMP18:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP18]], ptr addrspace(5) [[BLOCK_CAPTURED18]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr addrspace(5) [[BLOCK12]], i32 0, i32 5 -// GFX900-NEXT: [[TMP19:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: store i64 [[TMP19]], ptr addrspace(5) [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: [[TMP20:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr +// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA16:![0-9]+]] +// GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]] +// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18:![0-9]+]] +// GFX900-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0 +// GFX900-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8 +// GFX900-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1 +// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN]], align 4 +// GFX900-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 2 +// GFX900-NEXT: store ptr @__test_block_invoke, ptr [[BLOCK_INVOKE]], align 8 +// GFX900-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 3 +// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 4 +// GFX900-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA13]] +// GFX900-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[TBAA13]] +// GFX900-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr byval([[STRUCT_NDRANGE_T]]) [[TMP_ASCAST]], ptr @__test_block_invoke_kernel, ptr [[BLOCK_ASCAST]]) +// GFX900-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]] +// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]] +// GFX900-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0 +// GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8 +// GFX900-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1 +// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN5]], align 4 +// GFX900-NEXT: [[BLOCK_INVOKE6:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 2 +// GFX900-NEXT: store ptr @__test_block_invoke_2, ptr [[BLOCK_INVOKE6]], align 8 +// GFX900-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 3 +// GFX900-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 6 +// GFX900-NEXT: [[TMP8:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA13]] +// GFX900-NEXT: store i8 [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8, !tbaa [[TBAA13]] +// GFX900-NEXT: [[BLOCK_CAPTURED9:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 4 +// GFX900-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP9]], ptr [[BLOCK_CAPTURED9]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[BLOCK_CAPTURED10:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 5 +// GFX900-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr byval([[STRUCT_NDRANGE_T]]) [[TMP2_ASCAST]], ptr @__test_block_invoke_2_kernel, ptr [[BLOCK3_ASCAST]]) +// GFX900-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]] +// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]] +// GFX900-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0 +// GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8 +// GFX900-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1 +// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN14]], align 4 +// GFX900-NEXT: [[BLOCK_INVOKE15:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 2 +// GFX900-NEXT: store ptr @__test_block_invoke_3, ptr [[BLOCK_INVOKE15]], align 8 +// GFX900-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 3 +// GFX900-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP14]], ptr [[BLOCK_CAPTURED16]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[BLOCK_CAPTURED17:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 6 +// GFX900-NEXT: [[TMP15:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA13]] +// GFX900-NEXT: store i8 [[TMP15]], ptr [[BLOCK_CAPTURED17]], align 8, !tbaa [[TBAA13]] +// GFX900-NEXT: [[BLOCK_CAPTURED18:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 4 +// GFX900-NEXT: [[TMP16:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP16]], ptr [[BLOCK_CAPTURED18]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5 +// GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]] -// GFX900-NEXT: [[TMP21:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0 -// GFX900-NEXT: store i64 100, ptr addrspace(5) [[TMP21]], align 8 -// GFX900-NEXT: [[TMP22:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP14]], i32 [[TMP15]], ptr addrspace(5) [[VARTMP11]], ptr @__test_block_invoke_3_kernel, ptr [[TMP20]], i32 1, ptr addrspace(5) [[TMP21]]) +// GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0 +// GFX900-NEXT: store i64 100, ptr [[TMP18]], align 8 +// GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr [[TMP11_ASCAST]], ptr @__test_block_invoke_3_kernel, ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]]) // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR8]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]] -// GFX900-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 0 -// GFX900-NEXT: store i32 32, ptr addrspace(5) [[BLOCK_SIZE22]], align 8 -// GFX900-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 1 -// GFX900-NEXT: store i32 8, ptr addrspace(5) [[BLOCK_ALIGN23]], align 4 -// GFX900-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 2 -// GFX900-NEXT: store ptr @__test_block_invoke_4, ptr addrspace(5) [[BLOCK_INVOKE24]], align 8 -// GFX900-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 3 -// GFX900-NEXT: [[TMP23:%.*]] = load i64, ptr addrspace(5) [[D_ADDR]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: store i64 [[TMP23]], ptr addrspace(5) [[BLOCK_CAPTURED25]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr addrspace(5) [[BLOCK21]], i32 0, i32 4 -// GFX900-NEXT: [[TMP24:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[C_ADDR]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store ptr addrspace(1) [[TMP24]], ptr addrspace(5) [[BLOCK_CAPTURED26]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr -// GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA13]] -// GFX900-NEXT: [[TMP25:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA16]] -// GFX900-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]] -// GFX900-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[VARTMP27]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]] -// GFX900-NEXT: [[TMP27:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA13]] -// GFX900-NEXT: [[TMP28:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr -// GFX900-NEXT: [[TMP29:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP25]], i32 [[TMP26]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[VARTMP27]], ptr @__test_block_invoke_4_kernel, ptr [[TMP28]]) +// GFX900-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0 +// GFX900-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8 +// GFX900-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1 +// GFX900-NEXT: store i32 8, ptr [[BLOCK_ALIGN23]], align 4 +// GFX900-NEXT: [[BLOCK_INVOKE24:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 2 +// GFX900-NEXT: store ptr @__test_block_invoke_4, ptr [[BLOCK_INVOKE24]], align 8 +// GFX900-NEXT: [[BLOCK_CAPTURED25:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 3 +// GFX900-NEXT: [[TMP20:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: store i64 [[TMP20]], ptr [[BLOCK_CAPTURED25]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4 +// GFX900-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA13]] +// GFX900-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]] +// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]] +// GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA13]] +// GFX900-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr byval([[STRUCT_NDRANGE_T]]) [[TMP27_ASCAST]], ptr @__test_block_invoke_4_kernel, ptr [[BLOCK21_ASCAST]]) // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR8]] // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]] // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]] @@ -522,7 +566,8 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5:[0-9]+]] { // GFX900-NEXT: entry: // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr +// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 // GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA13]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 @@ -548,7 +593,8 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { // GFX900-NEXT: entry: // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr +// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 // GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA13]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 @@ -581,8 +627,10 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: entry: // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // GFX900-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) -// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 -// GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr addrspace(5) [[LP_ADDR]], align 4, !tbaa [[TBAA7]] +// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr +// GFX900-NEXT: [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr +// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 +// GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA7]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 // GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA13]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 @@ -595,7 +643,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]] // GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0 // GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[LP_ADDR]], align 4, !tbaa [[TBAA7]] +// GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA7]] // GFX900-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0 // GFX900-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[TBAA14]] // GFX900-NEXT: ret void @@ -617,7 +665,8 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { // GFX900-NEXT: entry: // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr +// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 // GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 @@ -646,16 +695,21 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) // GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) // GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) -// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr addrspace(5) [[I_ADDR]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr +// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr +// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr +// GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]] -// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]] +// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]] // GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA16]] -// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA14]] -// GFX900-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]] -// GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA14]] +// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT18]] +// GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr byval([[STRUCT_NDRANGE_T]]) [[TMP_ASCAST]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR8]] // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR8]] // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR8]] @@ -667,7 +721,8 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { // GFX900-NEXT: entry: // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr +// GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 // GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() // GFX900-NEXT: ret void // diff --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl index 8a04456b5df04..53cca49e87ef4 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl @@ -139,12 +139,12 @@ void test_static_var_local(void) { // Test function-scope variable initialization. // NOOPT-LABEL: @test_func_scope_var_private( -// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp1, align 4 -// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp2, align 4 -// NOOPT: store ptr addrspace(5) null, ptr addrspace(5) %sp3, align 4 -// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp4, align 4 -// NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %SS1, ptr addrspace(4) align 8 @__const.test_func_scope_var_private.SS1, i64 32, i1 false) -// NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2, i8 0, i64 24, i1 false) +// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %sp1{{.*}}, align 4 +// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %sp2{{.*}}, align 4 +// NOOPT: store ptr addrspace(5) null, ptr %sp3{{.*}}, align 4 +// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %sp4{{.*}}, align 4 +// NOOPT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_private.SS1, i64 32, i1 false) +// NOOPT: call void @llvm.memset.p0.i64(ptr align 8 %SS2{{.*}}, i8 0, i64 24, i1 false) void test_func_scope_var_private(void) { private char *sp1 = 0; private char *sp2 = NULL; @@ -157,12 +157,12 @@ void test_func_scope_var_private(void) { // Test function-scope variable initialization. // NOOPT-LABEL: @test_func_scope_var_local( -// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp1, align 4 -// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp2, align 4 -// NOOPT: store ptr addrspace(3) null, ptr addrspace(5) %sp3, align 4 -// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp4, align 4 -// NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %SS1, ptr addrspace(4) align 8 @__const.test_func_scope_var_local.SS1, i64 32, i1 false) -// NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2, i8 0, i64 24, i1 false) +// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %sp1{{.*}}, align 4 +// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %sp2{{.*}}, align 4 +// NOOPT: store ptr addrspace(3) null, ptr %sp3{{.*}}, align 4 +// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %sp4{{.*}}, align 4 +// NOOPT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_local.SS1, i64 32, i1 false) +// NOOPT: call void @llvm.memset.p0.i64(ptr align 8 %SS2{{.*}}, i8 0, i64 24, i1 false) void test_func_scope_var_local(void) { local char *sp1 = 0; local char *sp2 = NULL; @@ -603,7 +603,7 @@ int test_and_ptr(private char* p1, local char* p2) { // Test folding of null pointer in function scope. // NOOPT-LABEL: test_fold_private // NOOPT: call void @test_fold_callee -// NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob, align 8 +// NOOPT: store ptr addrspace(1) null, ptr %glob{{.*}}, align 8 // NOOPT: %{{.*}} = sub i64 %{{.*}}, 0 // NOOPT: call void @test_fold_callee // NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)) to i32) to i64 @@ -619,7 +619,7 @@ void test_fold_private(void) { // NOOPT-LABEL: test_fold_local // NOOPT: call void @test_fold_callee -// NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob, align 8 +// NOOPT: store ptr addrspace(1) null, ptr %glob{{.*}}, align 8 // NOOPT: %{{.*}} = sub i64 %{{.*}}, 0 // NOOPT: call void @test_fold_callee // NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) to i32) to i64 diff --git a/clang/test/CodeGenOpenCL/atomic-ops.cl b/clang/test/CodeGenOpenCL/atomic-ops.cl index 5e2de38ac3d3e..2d76d179344f8 100644 --- a/clang/test/CodeGenOpenCL/atomic-ops.cl +++ b/clang/test/CodeGenOpenCL/atomic-ops.cl @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa \ // RUN: | FileCheck %s @@ -35,309 +36,897 @@ typedef enum memory_scope { atomic_int j; +// CHECK-LABEL: define dso_local void @fi1( +// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP1]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP3]] to ptr +// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP3]] syncscope("agent") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[ATOMIC_TEMP1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ATOMIC_TEMP1_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP5]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP8]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load atomic i32, ptr [[TMP9]] syncscope("wavefront") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP10]], ptr [[ATOMIC_TEMP3_ASCAST]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ATOMIC_TEMP3_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP11]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: ret void +// void fi1(atomic_int *i) { - // CHECK-LABEL: @fi1 - // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); - // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst, align 4 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); - // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} seq_cst, align 4 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); - // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst, align 4 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); } +// CHECK-LABEL: define dso_local void @fi2( +// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: ret void +// void fi2(atomic_int *i) { - // CHECK-LABEL: @fi2 - // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); } +// CHECK-LABEL: define dso_local void @test_addr( +// CHECK-SAME: ptr addrspace(1) noundef [[IG:%.*]], ptr addrspace(5) noundef [[IP:%.*]], ptr addrspace(3) noundef [[IL:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IG_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[IP_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// CHECK-NEXT: [[IL_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP2:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[IG_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IG_ADDR]] to ptr +// CHECK-NEXT: [[IP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IP_ADDR]] to ptr +// CHECK-NEXT: [[IL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IL_ADDR]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP2]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[IG]], ptr [[IG_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(5) [[IP]], ptr [[IP_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr addrspace(3) [[IL]], ptr [[IL_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IG_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(1) [[TMP0]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(5), ptr [[IP_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 +// CHECK-NEXT: store atomic i32 [[TMP3]], ptr addrspace(5) [[TMP2]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[IL_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP2_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTATOMICTMP2_ASCAST]], align 4 +// CHECK-NEXT: store atomic i32 [[TMP5]], ptr addrspace(3) [[TMP4]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: ret void +// void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) { - // CHECK-LABEL: @test_addr - // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(1) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(5) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(3) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 __opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group); } +// CHECK-LABEL: define dso_local void @fi3( +// CHECK-SAME: ptr noundef [[I:%.*]], ptr noundef [[UI:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[UI_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// CHECK-NEXT: [[UI_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[UI_ADDR]] to ptr +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr +// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[UI]], ptr [[UI_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = atomicrmw min ptr [[TMP4]], i32 [[TMP5]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP7]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = atomicrmw max ptr [[TMP8]], i32 [[TMP9]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP11]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[UI_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw umin ptr [[TMP12]], i32 [[TMP13]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP15]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[UI_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = atomicrmw umax ptr [[TMP16]], i32 [[TMP17]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP18]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP19]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: ret void +// void fi3(atomic_int *i, atomic_uint *ui) { - // CHECK-LABEL: @fi3 - // CHECK: atomicrmw and ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw min ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw max ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw umin ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw umax ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group); } +// CHECK-LABEL: define dso_local zeroext i1 @fi4( +// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// CHECK-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr +// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("workgroup-one-as") acquire acquire, align 4 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 +// CHECK-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] +// CHECK: [[CMPXCHG_STORE_EXPECTED]]: +// CHECK-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]] +// CHECK: [[CMPXCHG_CONTINUE]]: +// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 +// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK-NEXT: ret i1 [[LOADEDV]] +// bool fi4(atomic_int *i) { - // CHECK-LABEL: @fi4( - // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg ptr [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire, align 4 - // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 - // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 - // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] - // CHECK: store i32 [[OLD]] int cmp = 0; return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group); } +// CHECK-LABEL: define dso_local void @fi5( +// CHECK-SAME: ptr noundef [[I:%.*]], i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// CHECK-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: switch i32 [[TMP1]], label %[[OPENCL_ALLSVMDEVICES:.*]] [ +// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP:.*]] +// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE:.*]] +// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP:.*]] +// CHECK-NEXT: ] +// CHECK: [[OPENCL_WORKGROUP]]: +// CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE:.*]] +// CHECK: [[OPENCL_DEVICE]]: +// CHECK-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// CHECK: [[OPENCL_ALLSVMDEVICES]]: +// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP0]] seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// CHECK: [[OPENCL_SUBGROUP]]: +// CHECK-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP5]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// CHECK: [[ATOMIC_SCOPE_CONTINUE]]: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: ret void +// void fi5(atomic_int *i, int scope) { - // CHECK-LABEL: @fi5 - // CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [ - // CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]] - // CHECK-NEXT: i32 2, label %[[opencl_device:.*]] - // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]] - // CHECK-NEXT: ] - // CHECK: [[opencl_workgroup]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup") seq_cst, align 4 - // CHECK: br label %[[continue:.*]] - // CHECK: [[opencl_device]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent") seq_cst, align 4 - // CHECK: br label %[[continue]] - // CHECK: [[opencl_allsvmdevices]]: - // CHECK: load atomic i32, ptr %{{.*}} seq_cst, align 4 - // CHECK: br label %[[continue]] - // CHECK: [[opencl_subgroup]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront") seq_cst, align 4 - // CHECK: br label %[[continue]] - // CHECK: [[continue]]: int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); } +// CHECK-LABEL: define dso_local void @fi6( +// CHECK-SAME: ptr noundef [[I:%.*]], i32 noundef [[ORDER:%.*]], i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ORDER_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// CHECK-NEXT: [[ORDER_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORDER_ADDR]] to ptr +// CHECK-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[ORDER]], ptr [[ORDER_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ORDER_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: switch i32 [[TMP1]], label %[[MONOTONIC:.*]] [ +// CHECK-NEXT: i32 1, label %[[ACQUIRE:.*]] +// CHECK-NEXT: i32 2, label %[[ACQUIRE]] +// CHECK-NEXT: i32 5, label %[[SEQCST:.*]] +// CHECK-NEXT: ] +// CHECK: [[MONOTONIC]]: +// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES:.*]] [ +// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP:.*]] +// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE:.*]] +// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP:.*]] +// CHECK-NEXT: ] +// CHECK: [[ACQUIRE]]: +// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES3:.*]] [ +// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP1:.*]] +// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE2:.*]] +// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP4:.*]] +// CHECK-NEXT: ] +// CHECK: [[SEQCST]]: +// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES8:.*]] [ +// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP6:.*]] +// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE7:.*]] +// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP9:.*]] +// CHECK-NEXT: ] +// CHECK: [[ATOMIC_CONTINUE:.*]]: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: ret void +// CHECK: [[OPENCL_WORKGROUP]]: +// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup-one-as") monotonic, align 4 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE:.*]] +// CHECK: [[OPENCL_DEVICE]]: +// CHECK-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent-one-as") monotonic, align 4 +// CHECK-NEXT: store i32 [[TMP5]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// CHECK: [[OPENCL_ALLSVMDEVICES]]: +// CHECK-NEXT: [[TMP6:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("one-as") monotonic, align 4 +// CHECK-NEXT: store i32 [[TMP6]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// CHECK: [[OPENCL_SUBGROUP]]: +// CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront-one-as") monotonic, align 4 +// CHECK-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] +// CHECK: [[ATOMIC_SCOPE_CONTINUE]]: +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] +// CHECK: [[OPENCL_WORKGROUP1]]: +// CHECK-NEXT: [[TMP8:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup-one-as") acquire, align 4 +// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5:.*]] +// CHECK: [[OPENCL_DEVICE2]]: +// CHECK-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent-one-as") acquire, align 4 +// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]] +// CHECK: [[OPENCL_ALLSVMDEVICES3]]: +// CHECK-NEXT: [[TMP10:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("one-as") acquire, align 4 +// CHECK-NEXT: store i32 [[TMP10]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]] +// CHECK: [[OPENCL_SUBGROUP4]]: +// CHECK-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront-one-as") acquire, align 4 +// CHECK-NEXT: store i32 [[TMP11]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]] +// CHECK: [[ATOMIC_SCOPE_CONTINUE5]]: +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] +// CHECK: [[OPENCL_WORKGROUP6]]: +// CHECK-NEXT: [[TMP12:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10:.*]] +// CHECK: [[OPENCL_DEVICE7]]: +// CHECK-NEXT: [[TMP13:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP13]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]] +// CHECK: [[OPENCL_ALLSVMDEVICES8]]: +// CHECK-NEXT: [[TMP14:%.*]] = load atomic i32, ptr [[TMP0]] seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]] +// CHECK: [[OPENCL_SUBGROUP9]]: +// CHECK-NEXT: [[TMP15:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP15]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]] +// CHECK: [[ATOMIC_SCOPE_CONTINUE10]]: +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] +// void fi6(atomic_int *i, int order, int scope) { - // CHECK-LABEL: @fi6 - // CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [ - // CHECK-NEXT: i32 1, label %[[acquire:.*]] - // CHECK-NEXT: i32 2, label %[[acquire:.*]] - // CHECK-NEXT: i32 5, label %[[seqcst:.*]] - // CHECK-NEXT: ] - // CHECK: [[monotonic]]: - // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [ - // CHECK-NEXT: i32 1, label %[[MON_WG:.*]] - // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]] - // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]] - // CHECK-NEXT: ] - // CHECK: [[acquire]]: - // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [ - // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]] - // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]] - // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]] - // CHECK-NEXT: ] - // CHECK: [[seqcst]]: - // CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [ - // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]] - // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]] - // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] - // CHECK-NEXT: ] - // CHECK: [[MON_WG]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 4 - // CHECK: [[MON_DEV]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 4 - // CHECK: [[MON_ALL]]: - // CHECK: load atomic i32, ptr %{{.*}} monotonic, align 4 - // CHECK: [[MON_SUB]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 4 - // CHECK: [[ACQ_WG]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup-one-as") acquire, align 4 - // CHECK: [[ACQ_DEV]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent-one-as") acquire, align 4 - // CHECK: [[ACQ_ALL]]: - // CHECK: load atomic i32, ptr %{{.*}} acquire, align 4 - // CHECK: [[ACQ_SUB]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront-one-as") acquire, align 4 - // CHECK: [[SEQ_WG]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup") seq_cst, align 4 - // CHECK: [[SEQ_DEV]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent") seq_cst, align 4 - // CHECK: [[SEQ_ALL]]: - // CHECK: load atomic i32, ptr %{{.*}} seq_cst, align 4 - // CHECK: [[SEQ_SUB]]: - // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront") seq_cst, align 4 int x = __opencl_atomic_load(i, order, scope); } +// CHECK-LABEL: define dso_local float @ff1( +// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr [[D_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[D_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[TMP0]] syncscope("workgroup-one-as") monotonic, align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: ret float [[TMP2]] +// float ff1(global atomic_float *d) { - // CHECK-LABEL: @ff1 - // CHECK: load atomic i32, ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 4 return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group); } +// CHECK-LABEL: define dso_local void @ff2( +// CHECK-SAME: ptr noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] syncscope("workgroup-one-as") release, align 4 +// CHECK-NEXT: ret void +// void ff2(atomic_float *d) { - // CHECK-LABEL: @ff2 - // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release, align 4 __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group); } +// CHECK-LABEL: define dso_local float @ff3( +// CHECK-SAME: ptr noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: ret float [[TMP3]] +// float ff3(atomic_float *d) { - // CHECK-LABEL: @ff3 - // CHECK: atomicrmw xchg ptr {{.*}} syncscope("workgroup") seq_cst, align 4 return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group); } +// CHECK-LABEL: define dso_local float @ff4( +// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]], float noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr [[D_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store float [[A]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[D_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP0]], float [[TMP2]] syncscope("workgroup-one-as") monotonic, align 4 +// CHECK-NEXT: store float [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: ret float [[TMP4]] +// float ff4(global atomic_float *d, float a) { - // CHECK-LABEL: @ff4 - // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); } +// CHECK-LABEL: define dso_local float @ff5( +// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]], double noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca double, align 8, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr [[D_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store double [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[D_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store double [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[DOTATOMICTMP_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP0]], double [[TMP2]] syncscope("workgroup-one-as") monotonic, align 8 +// CHECK-NEXT: store double [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ATOMIC_TEMP_ASCAST]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP4]] to float +// CHECK-NEXT: ret float [[CONV]] +// float ff5(global atomic_double *d, double a) { - // CHECK-LABEL: @ff5 - // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); } -// CHECK-LABEL: @atomic_init_foo +// CHECK-LABEL: define dso_local void @atomic_init_foo( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: store i32 42, ptr addrspace(1) @j, align 4 +// CHECK-NEXT: ret void +// void atomic_init_foo() { - // CHECK-NOT: atomic - // CHECK: store __opencl_atomic_init(&j, 42); - // CHECK-NOT: atomic - // CHECK: } } -// CHECK-LABEL: @failureOrder +// CHECK-LABEL: define dso_local void @failureOrder( +// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr +// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr +// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 43, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] syncscope("workgroup-one-as") acquire monotonic, align 4 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +// CHECK-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] +// CHECK: [[CMPXCHG_STORE_EXPECTED]]: +// CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]] +// CHECK: [[CMPXCHG_CONTINUE]]: +// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8 +// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[PTR2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 43, ptr [[DOTATOMICTMP1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = cmpxchg weak ptr [[TMP8]], i32 [[TMP10]], i32 [[TMP11]] syncscope("workgroup") seq_cst acquire, align 4 +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 +// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 +// CHECK-NEXT: br i1 [[TMP14]], label %[[CMPXCHG_CONTINUE4:.*]], label %[[CMPXCHG_STORE_EXPECTED3:.*]] +// CHECK: [[CMPXCHG_STORE_EXPECTED3]]: +// CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP9]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE4]] +// CHECK: [[CMPXCHG_CONTINUE4]]: +// CHECK-NEXT: [[STOREDV5:%.*]] = zext i1 [[TMP14]] to i8 +// CHECK-NEXT: store i8 [[STOREDV5]], ptr [[CMPXCHG_BOOL2_ASCAST]], align 1 +// CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1 +// CHECK-NEXT: [[LOADEDV6:%.*]] = trunc i8 [[TMP15]] to i1 +// CHECK-NEXT: ret void +// void failureOrder(atomic_int *ptr, int *ptr2) { - // CHECK: cmpxchg ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic, align 4 __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); - // CHECK: cmpxchg weak ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire, align 4 __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group); } -// CHECK-LABEL: @generalFailureOrder +// CHECK-LABEL: define dso_local void @generalFailureOrder( +// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i32 noundef [[SUCCESS:%.*]], i32 noundef [[FAIL:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[SUCCESS_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[FAIL_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr +// CHECK-NEXT: [[SUCCESS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUCCESS_ADDR]] to ptr +// CHECK-NEXT: [[FAIL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FAIL_ADDR]] to ptr +// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr +// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr +// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[SUCCESS]], ptr [[SUCCESS_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[FAIL]], ptr [[FAIL_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SUCCESS_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 42, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[FAIL_ADDR_ASCAST]], align 4 +// CHECK-NEXT: switch i32 [[TMP1]], label %[[MONOTONIC:.*]] [ +// CHECK-NEXT: i32 1, label %[[ACQUIRE:.*]] +// CHECK-NEXT: i32 2, label %[[ACQUIRE]] +// CHECK-NEXT: i32 3, label %[[RELEASE:.*]] +// CHECK-NEXT: i32 4, label %[[ACQREL:.*]] +// CHECK-NEXT: i32 5, label %[[SEQCST:.*]] +// CHECK-NEXT: ] +// CHECK: [[MONOTONIC]]: +// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL:.*]] [ +// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL:.*]] +// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL]] +// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL:.*]] +// CHECK-NEXT: ] +// CHECK: [[ACQUIRE]]: +// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL8:.*]] [ +// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL9:.*]] +// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL9]] +// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL10:.*]] +// CHECK-NEXT: ] +// CHECK: [[RELEASE]]: +// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL21:.*]] [ +// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL22:.*]] +// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL22]] +// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL23:.*]] +// CHECK-NEXT: ] +// CHECK: [[ACQREL]]: +// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL34:.*]] [ +// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL35:.*]] +// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL35]] +// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL36:.*]] +// CHECK-NEXT: ] +// CHECK: [[SEQCST]]: +// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL47:.*]] [ +// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL48:.*]] +// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL48]] +// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL49:.*]] +// CHECK-NEXT: ] +// CHECK: [[ATOMIC_CONTINUE:.*]]: +// CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK-NEXT: ret void +// CHECK: [[MONOTONIC_FAIL]]: +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP6]] syncscope("workgroup-one-as") monotonic monotonic, align 4 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { i32, i1 } [[TMP7]], 0 +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +// CHECK-NEXT: br i1 [[TMP9]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] +// CHECK: [[ACQUIRE_FAIL]]: +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP10]], i32 [[TMP11]] syncscope("workgroup-one-as") monotonic acquire, align 4 +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 +// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 +// CHECK-NEXT: br i1 [[TMP14]], label %[[CMPXCHG_CONTINUE3:.*]], label %[[CMPXCHG_STORE_EXPECTED2:.*]] +// CHECK: [[SEQCST_FAIL]]: +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP15]], i32 [[TMP16]] syncscope("workgroup-one-as") monotonic seq_cst, align 4 +// CHECK-NEXT: [[TMP18:%.*]] = extractvalue { i32, i1 } [[TMP17]], 0 +// CHECK-NEXT: [[TMP19:%.*]] = extractvalue { i32, i1 } [[TMP17]], 1 +// CHECK-NEXT: br i1 [[TMP19]], label %[[CMPXCHG_CONTINUE6:.*]], label %[[CMPXCHG_STORE_EXPECTED5:.*]] +// CHECK: [[ATOMIC_CONTINUE1:.*]]: +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] +// CHECK: [[CMPXCHG_STORE_EXPECTED]]: +// CHECK-NEXT: store i32 [[TMP8]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]] +// CHECK: [[CMPXCHG_CONTINUE]]: +// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP9]] to i8 +// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]] +// CHECK: [[CMPXCHG_STORE_EXPECTED2]]: +// CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE3]] +// CHECK: [[CMPXCHG_CONTINUE3]]: +// CHECK-NEXT: [[STOREDV4:%.*]] = zext i1 [[TMP14]] to i8 +// CHECK-NEXT: store i8 [[STOREDV4]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]] +// CHECK: [[CMPXCHG_STORE_EXPECTED5]]: +// CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE6]] +// CHECK: [[CMPXCHG_CONTINUE6]]: +// CHECK-NEXT: [[STOREDV7:%.*]] = zext i1 [[TMP19]] to i8 +// CHECK-NEXT: store i8 [[STOREDV7]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]] +// CHECK: [[MONOTONIC_FAIL8]]: +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP20]], i32 [[TMP21]] syncscope("workgroup-one-as") acquire monotonic, align 4 +// CHECK-NEXT: [[TMP23:%.*]] = extractvalue { i32, i1 } [[TMP22]], 0 +// CHECK-NEXT: [[TMP24:%.*]] = extractvalue { i32, i1 } [[TMP22]], 1 +// CHECK-NEXT: br i1 [[TMP24]], label %[[CMPXCHG_CONTINUE13:.*]], label %[[CMPXCHG_STORE_EXPECTED12:.*]] +// CHECK: [[ACQUIRE_FAIL9]]: +// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP25]], i32 [[TMP26]] syncscope("workgroup-one-as") acquire acquire, align 4 +// CHECK-NEXT: [[TMP28:%.*]] = extractvalue { i32, i1 } [[TMP27]], 0 +// CHECK-NEXT: [[TMP29:%.*]] = extractvalue { i32, i1 } [[TMP27]], 1 +// CHECK-NEXT: br i1 [[TMP29]], label %[[CMPXCHG_CONTINUE16:.*]], label %[[CMPXCHG_STORE_EXPECTED15:.*]] +// CHECK: [[SEQCST_FAIL10]]: +// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP32:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP30]], i32 [[TMP31]] syncscope("workgroup-one-as") acquire seq_cst, align 4 +// CHECK-NEXT: [[TMP33:%.*]] = extractvalue { i32, i1 } [[TMP32]], 0 +// CHECK-NEXT: [[TMP34:%.*]] = extractvalue { i32, i1 } [[TMP32]], 1 +// CHECK-NEXT: br i1 [[TMP34]], label %[[CMPXCHG_CONTINUE19:.*]], label %[[CMPXCHG_STORE_EXPECTED18:.*]] +// CHECK: [[ATOMIC_CONTINUE11:.*]]: +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] +// CHECK: [[CMPXCHG_STORE_EXPECTED12]]: +// CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE13]] +// CHECK: [[CMPXCHG_CONTINUE13]]: +// CHECK-NEXT: [[STOREDV14:%.*]] = zext i1 [[TMP24]] to i8 +// CHECK-NEXT: store i8 [[STOREDV14]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE11]] +// CHECK: [[CMPXCHG_STORE_EXPECTED15]]: +// CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE16]] +// CHECK: [[CMPXCHG_CONTINUE16]]: +// CHECK-NEXT: [[STOREDV17:%.*]] = zext i1 [[TMP29]] to i8 +// CHECK-NEXT: store i8 [[STOREDV17]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE11]] +// CHECK: [[CMPXCHG_STORE_EXPECTED18]]: +// CHECK-NEXT: store i32 [[TMP33]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE19]] +// CHECK: [[CMPXCHG_CONTINUE19]]: +// CHECK-NEXT: [[STOREDV20:%.*]] = zext i1 [[TMP34]] to i8 +// CHECK-NEXT: store i8 [[STOREDV20]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE11]] +// CHECK: [[MONOTONIC_FAIL21]]: +// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP37:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP35]], i32 [[TMP36]] syncscope("workgroup-one-as") release monotonic, align 4 +// CHECK-NEXT: [[TMP38:%.*]] = extractvalue { i32, i1 } [[TMP37]], 0 +// CHECK-NEXT: [[TMP39:%.*]] = extractvalue { i32, i1 } [[TMP37]], 1 +// CHECK-NEXT: br i1 [[TMP39]], label %[[CMPXCHG_CONTINUE26:.*]], label %[[CMPXCHG_STORE_EXPECTED25:.*]] +// CHECK: [[ACQUIRE_FAIL22]]: +// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP42:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP40]], i32 [[TMP41]] syncscope("workgroup-one-as") release acquire, align 4 +// CHECK-NEXT: [[TMP43:%.*]] = extractvalue { i32, i1 } [[TMP42]], 0 +// CHECK-NEXT: [[TMP44:%.*]] = extractvalue { i32, i1 } [[TMP42]], 1 +// CHECK-NEXT: br i1 [[TMP44]], label %[[CMPXCHG_CONTINUE29:.*]], label %[[CMPXCHG_STORE_EXPECTED28:.*]] +// CHECK: [[SEQCST_FAIL23]]: +// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP47:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP45]], i32 [[TMP46]] syncscope("workgroup-one-as") release seq_cst, align 4 +// CHECK-NEXT: [[TMP48:%.*]] = extractvalue { i32, i1 } [[TMP47]], 0 +// CHECK-NEXT: [[TMP49:%.*]] = extractvalue { i32, i1 } [[TMP47]], 1 +// CHECK-NEXT: br i1 [[TMP49]], label %[[CMPXCHG_CONTINUE32:.*]], label %[[CMPXCHG_STORE_EXPECTED31:.*]] +// CHECK: [[ATOMIC_CONTINUE24:.*]]: +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] +// CHECK: [[CMPXCHG_STORE_EXPECTED25]]: +// CHECK-NEXT: store i32 [[TMP38]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE26]] +// CHECK: [[CMPXCHG_CONTINUE26]]: +// CHECK-NEXT: [[STOREDV27:%.*]] = zext i1 [[TMP39]] to i8 +// CHECK-NEXT: store i8 [[STOREDV27]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE24]] +// CHECK: [[CMPXCHG_STORE_EXPECTED28]]: +// CHECK-NEXT: store i32 [[TMP43]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE29]] +// CHECK: [[CMPXCHG_CONTINUE29]]: +// CHECK-NEXT: [[STOREDV30:%.*]] = zext i1 [[TMP44]] to i8 +// CHECK-NEXT: store i8 [[STOREDV30]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE24]] +// CHECK: [[CMPXCHG_STORE_EXPECTED31]]: +// CHECK-NEXT: store i32 [[TMP48]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE32]] +// CHECK: [[CMPXCHG_CONTINUE32]]: +// CHECK-NEXT: [[STOREDV33:%.*]] = zext i1 [[TMP49]] to i8 +// CHECK-NEXT: store i8 [[STOREDV33]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE24]] +// CHECK: [[MONOTONIC_FAIL34]]: +// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP52:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP50]], i32 [[TMP51]] syncscope("workgroup-one-as") acq_rel monotonic, align 4 +// CHECK-NEXT: [[TMP53:%.*]] = extractvalue { i32, i1 } [[TMP52]], 0 +// CHECK-NEXT: [[TMP54:%.*]] = extractvalue { i32, i1 } [[TMP52]], 1 +// CHECK-NEXT: br i1 [[TMP54]], label %[[CMPXCHG_CONTINUE39:.*]], label %[[CMPXCHG_STORE_EXPECTED38:.*]] +// CHECK: [[ACQUIRE_FAIL35]]: +// CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP57:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP55]], i32 [[TMP56]] syncscope("workgroup-one-as") acq_rel acquire, align 4 +// CHECK-NEXT: [[TMP58:%.*]] = extractvalue { i32, i1 } [[TMP57]], 0 +// CHECK-NEXT: [[TMP59:%.*]] = extractvalue { i32, i1 } [[TMP57]], 1 +// CHECK-NEXT: br i1 [[TMP59]], label %[[CMPXCHG_CONTINUE42:.*]], label %[[CMPXCHG_STORE_EXPECTED41:.*]] +// CHECK: [[SEQCST_FAIL36]]: +// CHECK-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP62:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP60]], i32 [[TMP61]] syncscope("workgroup-one-as") acq_rel seq_cst, align 4 +// CHECK-NEXT: [[TMP63:%.*]] = extractvalue { i32, i1 } [[TMP62]], 0 +// CHECK-NEXT: [[TMP64:%.*]] = extractvalue { i32, i1 } [[TMP62]], 1 +// CHECK-NEXT: br i1 [[TMP64]], label %[[CMPXCHG_CONTINUE45:.*]], label %[[CMPXCHG_STORE_EXPECTED44:.*]] +// CHECK: [[ATOMIC_CONTINUE37:.*]]: +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] +// CHECK: [[CMPXCHG_STORE_EXPECTED38]]: +// CHECK-NEXT: store i32 [[TMP53]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE39]] +// CHECK: [[CMPXCHG_CONTINUE39]]: +// CHECK-NEXT: [[STOREDV40:%.*]] = zext i1 [[TMP54]] to i8 +// CHECK-NEXT: store i8 [[STOREDV40]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE37]] +// CHECK: [[CMPXCHG_STORE_EXPECTED41]]: +// CHECK-NEXT: store i32 [[TMP58]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE42]] +// CHECK: [[CMPXCHG_CONTINUE42]]: +// CHECK-NEXT: [[STOREDV43:%.*]] = zext i1 [[TMP59]] to i8 +// CHECK-NEXT: store i8 [[STOREDV43]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE37]] +// CHECK: [[CMPXCHG_STORE_EXPECTED44]]: +// CHECK-NEXT: store i32 [[TMP63]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE45]] +// CHECK: [[CMPXCHG_CONTINUE45]]: +// CHECK-NEXT: [[STOREDV46:%.*]] = zext i1 [[TMP64]] to i8 +// CHECK-NEXT: store i8 [[STOREDV46]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE37]] +// CHECK: [[MONOTONIC_FAIL47]]: +// CHECK-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP67:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP65]], i32 [[TMP66]] syncscope("workgroup") seq_cst monotonic, align 4 +// CHECK-NEXT: [[TMP68:%.*]] = extractvalue { i32, i1 } [[TMP67]], 0 +// CHECK-NEXT: [[TMP69:%.*]] = extractvalue { i32, i1 } [[TMP67]], 1 +// CHECK-NEXT: br i1 [[TMP69]], label %[[CMPXCHG_CONTINUE52:.*]], label %[[CMPXCHG_STORE_EXPECTED51:.*]] +// CHECK: [[ACQUIRE_FAIL48]]: +// CHECK-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP72:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP70]], i32 [[TMP71]] syncscope("workgroup") seq_cst acquire, align 4 +// CHECK-NEXT: [[TMP73:%.*]] = extractvalue { i32, i1 } [[TMP72]], 0 +// CHECK-NEXT: [[TMP74:%.*]] = extractvalue { i32, i1 } [[TMP72]], 1 +// CHECK-NEXT: br i1 [[TMP74]], label %[[CMPXCHG_CONTINUE55:.*]], label %[[CMPXCHG_STORE_EXPECTED54:.*]] +// CHECK: [[SEQCST_FAIL49]]: +// CHECK-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: [[TMP76:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP77:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP75]], i32 [[TMP76]] syncscope("workgroup") seq_cst seq_cst, align 4 +// CHECK-NEXT: [[TMP78:%.*]] = extractvalue { i32, i1 } [[TMP77]], 0 +// CHECK-NEXT: [[TMP79:%.*]] = extractvalue { i32, i1 } [[TMP77]], 1 +// CHECK-NEXT: br i1 [[TMP79]], label %[[CMPXCHG_CONTINUE58:.*]], label %[[CMPXCHG_STORE_EXPECTED57:.*]] +// CHECK: [[ATOMIC_CONTINUE50:.*]]: +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] +// CHECK: [[CMPXCHG_STORE_EXPECTED51]]: +// CHECK-NEXT: store i32 [[TMP68]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE52]] +// CHECK: [[CMPXCHG_CONTINUE52]]: +// CHECK-NEXT: [[STOREDV53:%.*]] = zext i1 [[TMP69]] to i8 +// CHECK-NEXT: store i8 [[STOREDV53]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE50]] +// CHECK: [[CMPXCHG_STORE_EXPECTED54]]: +// CHECK-NEXT: store i32 [[TMP73]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE55]] +// CHECK: [[CMPXCHG_CONTINUE55]]: +// CHECK-NEXT: [[STOREDV56:%.*]] = zext i1 [[TMP74]] to i8 +// CHECK-NEXT: store i8 [[STOREDV56]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE50]] +// CHECK: [[CMPXCHG_STORE_EXPECTED57]]: +// CHECK-NEXT: store i32 [[TMP78]], ptr [[TMP2]], align 4 +// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE58]] +// CHECK: [[CMPXCHG_CONTINUE58]]: +// CHECK-NEXT: [[STOREDV59:%.*]] = zext i1 [[TMP79]] to i8 +// CHECK-NEXT: store i8 [[STOREDV59]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 +// CHECK-NEXT: br label %[[ATOMIC_CONTINUE50]] +// void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) { __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group); -// CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [ - // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 2, label %[[ACQUIRE]] - // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]] - - // CHECK: [[MONOTONIC]] - // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [ - // CHECK-NEXT: i32 1, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 2, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 5, label %[[MONOTONIC_SEQCST:[0-9a-zA-Z._]+]] - // CHECK-NEXT: ] - - // CHECK: [[ACQUIRE]] - // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [ - // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 5, label %[[ACQUIRE_SEQCST:[0-9a-zA-Z._]+]] - // CHECK-NEXT: ] - - // CHECK: [[RELEASE]] - // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [ - // CHECK-NEXT: i32 1, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 2, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 5, label %[[RELEASE_SEQCST:[0-9a-zA-Z._]+]] - // CHECK-NEXT: ] - - // CHECK: [[ACQREL]] - // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [ - // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 5, label %[[ACQREL_SEQCST:[0-9a-zA-Z._]+]] - // CHECK-NEXT: ] - - // CHECK: [[SEQCST]] - // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [ - // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] - // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE]] - // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]] - // CHECK-NEXT: ] - - // CHECK: [[MONOTONIC_MONOTONIC]] - // CHECK: cmpxchg {{.*}} monotonic monotonic, align 4 - // CHECK: br - - // CHECK: [[MONOTONIC_ACQUIRE]] - // CHECK: cmpxchg {{.*}} monotonic acquire, align 4 - // CHECK: br - - // CHECK: [[MONOTONIC_SEQCST]] - // CHECK: cmpxchg {{.*}} monotonic seq_cst, align 4 - // CHECK: br - - // CHECK: [[ACQUIRE_MONOTONIC]] - // CHECK: cmpxchg {{.*}} acquire monotonic, align 4 - // CHECK: br - - // CHECK: [[ACQUIRE_ACQUIRE]] - // CHECK: cmpxchg {{.*}} acquire acquire, align 4 - // CHECK: br - - // CHECK: [[ACQUIRE_SEQCST]] - // CHECK: cmpxchg {{.*}} acquire seq_cst, align 4 - // CHECK: br - - // CHECK: [[RELEASE_MONOTONIC]] - // CHECK: cmpxchg {{.*}} release monotonic, align 4 - // CHECK: br - - // CHECK: [[RELEASE_ACQUIRE]] - // CHECK: cmpxchg {{.*}} release acquire, align 4 - // CHECK: br - - // CHECK: [[RELEASE_SEQCST]] - // CHECK: cmpxchg {{.*}} release seq_cst, align 4 - // CHECK: br - - // CHECK: [[ACQREL_MONOTONIC]] - // CHECK: cmpxchg {{.*}} acq_rel monotonic, align 4 - // CHECK: br - - // CHECK: [[ACQREL_ACQUIRE]] - // CHECK: cmpxchg {{.*}} acq_rel acquire, align 4 - // CHECK: br - - // CHECK: [[ACQREL_SEQCST]] - // CHECK: cmpxchg {{.*}} acq_rel seq_cst, align 4 - // CHECK: br - - // CHECK: [[SEQCST_MONOTONIC]] - // CHECK: cmpxchg {{.*}} seq_cst monotonic, align 4 - // CHECK: br - - // CHECK: [[SEQCST_ACQUIRE]] - // CHECK: cmpxchg {{.*}} seq_cst acquire, align 4 - // CHECK: br - - // CHECK: [[SEQCST_SEQCST]] - // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align 4 - // CHECK: br + + + + + + + + + + + + + + + + + + + + } +// CHECK-LABEL: define dso_local i32 @test_volatile( +// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr +// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load atomic volatile i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 +// CHECK-NEXT: ret i32 [[TMP2]] +// int test_volatile(volatile atomic_int *i) { - // CHECK-LABEL: @test_volatile - // CHECK: %[[i_addr:.*]] = alloca ptr - // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32 - // CHECK-NEXT: store ptr %i, ptr addrspace(5) %[[i_addr]] - // CHECK-NEXT: %[[addr:.*]] = load ptr, ptr addrspace(5) %[[i_addr]] - // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, ptr %[[addr]] syncscope("workgroup") seq_cst, align 4 - // CHECK-NEXT: store i32 %[[res]], ptr addrspace(5) %[[atomicdst]] - // CHECK-NEXT: %[[retval:.*]] = load i32, ptr addrspace(5) %[[atomicdst]] - // CHECK-NEXT: ret i32 %[[retval]] return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); } diff --git a/clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl b/clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl index 7d684bc185a58..0dafb44f12a3c 100644 --- a/clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/atomics-unsafe-hw-remarks-gfx90a.cl @@ -31,9 +31,9 @@ typedef enum memory_scope { // GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope agent-one-as due to an unsafe request. [-Rpass=si-lower] // GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope workgroup-one-as due to an unsafe request. [-Rpass=si-lower] -// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc -// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc -// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc +// GFX90A-HW-REMARK: global_atomic_add_f32 v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc +// GFX90A-HW-REMARK: global_atomic_add_f32 v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc +// GFX90A-HW-REMARK: global_atomic_add_f32 v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc // GFX90A-HW-LABEL: @atomic_unsafe_hw // GFX90A-HW: atomicrmw fadd ptr addrspace(1) %{{.*}}, float %{{.*}} syncscope("workgroup-one-as") monotonic, align 4 // GFX90A-HW: atomicrmw fadd ptr addrspace(1) %{{.*}}, float %{{.*}} syncscope("agent-one-as") monotonic, align 4 diff --git a/clang/test/CodeGenOpenCL/blocks.cl b/clang/test/CodeGenOpenCL/blocks.cl index e04722f657cfa..161f1406c96cb 100644 --- a/clang/test/CodeGenOpenCL/blocks.cl +++ b/clang/test/CodeGenOpenCL/blocks.cl @@ -25,13 +25,13 @@ void foo(){ // COMMON-NOT: %block.reserved // COMMON-NOT: %block.descriptor // SPIR: %[[block_size:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), i32 }>, ptr %block, i32 0, i32 0 - // AMDGCN: %[[block_size:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr addrspace(5) %block, i32 0, i32 0 + // AMDGCN: %[[block_size:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr %block{{.*}}, i32 0, i32 0 // SPIR: store i32 16, ptr %[[block_size]] - // AMDGCN: store i32 20, ptr addrspace(5) %[[block_size]] + // AMDGCN: store i32 20, ptr %[[block_size]] // SPIR: %[[block_align:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), i32 }>, ptr %block, i32 0, i32 1 - // AMDGCN: %[[block_align:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr addrspace(5) %block, i32 0, i32 1 + // AMDGCN: %[[block_align:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr %block{{.*}}, i32 0, i32 1 // SPIR: store i32 4, ptr %[[block_align]] - // AMDGCN: store i32 8, ptr addrspace(5) %[[block_align]] + // AMDGCN: store i32 8, ptr %[[block_align]] // SPIR: %[[block_invoke:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), i32 }>, ptr %[[block:.*]], i32 0, i32 2 // SPIR: store ptr addrspace(4) addrspacecast (ptr @__foo_block_invoke to ptr addrspace(4)), ptr %[[block_invoke]] // SPIR: %[[block_captured:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), i32 }>, ptr %[[block]], i32 0, i32 3 @@ -41,14 +41,13 @@ void foo(){ // SPIR: store ptr addrspace(4) %[[blk_gen_ptr]], ptr %[[block_B:.*]], // SPIR: %[[block_literal:.*]] = load ptr addrspace(4), ptr %[[block_B]] // SPIR: call {{.*}}i32 @__foo_block_invoke(ptr addrspace(4) noundef %[[block_literal]]) - // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr addrspace(5) %[[block:.*]], i32 0, i32 2 - // AMDGCN: store ptr @__foo_block_invoke, ptr addrspace(5) %[[block_invoke]] - // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr addrspace(5) %[[block]], i32 0, i32 3 - // AMDGCN: %[[i_value:.*]] = load i32, ptr addrspace(5) %i - // AMDGCN: store i32 %[[i_value]], ptr addrspace(5) %[[block_captured]], - // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast ptr addrspace(5) %[[block]] to ptr - // AMDGCN: store ptr %[[blk_gen_ptr]], ptr addrspace(5) %[[block_B:.*]], - // AMDGCN: %[[block_literal:.*]] = load ptr, ptr addrspace(5) %[[block_B]] + // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr %[[block:.*]], i32 0, i32 2 + // AMDGCN: store ptr @__foo_block_invoke, ptr %[[block_invoke]] + // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr %[[block]], i32 0, i32 3 + // AMDGCN: %[[i_value:.*]] = load i32, ptr %i + // AMDGCN: store i32 %[[i_value]], ptr %[[block_captured]], + // AMDGCN: store ptr %[[block]], ptr %[[block_B:.*]], + // AMDGCN: %[[block_literal:.*]] = load ptr, ptr %[[block_B]] // AMDGCN: call {{.*}}i32 @__foo_block_invoke(ptr noundef %[[block_literal]]) int (^ block_B)(void) = ^{ diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl index 474e95e74e006..85b449e45a0f1 100644 --- a/clang/test/CodeGenOpenCL/builtins-alloca.cl +++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl @@ -1,12 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \ -// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL12 %s // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 \ -// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL20 %s // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 \ -// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL30 %s // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space \ -// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL30GAS %s // OPENCL-LABEL: define dso_local void @test1_builtin_alloca( // OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { @@ -20,6 +20,61 @@ // OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) // OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 // OPENCL-NEXT: ret void +// OPENCL12-LABEL: define dso_local void @test1_builtin_alloca( +// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL12-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL12-NEXT: ret void +// +// OPENCL20-LABEL: define dso_local void @test1_builtin_alloca( +// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// OPENCL20-NEXT: [[ENTRY:.*:]] +// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL20-NEXT: [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr +// OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL20-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4 +// OPENCL20-NEXT: ret void +// +// OPENCL30-LABEL: define dso_local void @test1_builtin_alloca( +// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL30-NEXT: ret void +// +// OPENCL30GAS-LABEL: define dso_local void @test1_builtin_alloca( +// OPENCL30GAS-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// OPENCL30GAS-NEXT: [[ENTRY:.*:]] +// OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL30GAS-NEXT: [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr +// OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30GAS-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: ret void // void test1_builtin_alloca(unsigned n) { __private float* alloc_ptr = (__private float*)__builtin_alloca(n*sizeof(int)); @@ -37,6 +92,61 @@ void test1_builtin_alloca(unsigned n) { // OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) // OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 // OPENCL-NEXT: ret void +// OPENCL12-LABEL: define dso_local void @test1_builtin_alloca_uninitialized( +// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL12-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL12-NEXT: ret void +// +// OPENCL20-LABEL: define dso_local void @test1_builtin_alloca_uninitialized( +// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL20-NEXT: [[ENTRY:.*:]] +// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr +// OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL20-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4 +// OPENCL20-NEXT: ret void +// +// OPENCL30-LABEL: define dso_local void @test1_builtin_alloca_uninitialized( +// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL30-NEXT: ret void +// +// OPENCL30GAS-LABEL: define dso_local void @test1_builtin_alloca_uninitialized( +// OPENCL30GAS-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30GAS-NEXT: [[ENTRY:.*:]] +// OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL30GAS-NEXT: [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr +// OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30GAS-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4 +// OPENCL30GAS-NEXT: ret void // void test1_builtin_alloca_uninitialized(unsigned n) { __private float* alloc_ptr_uninitialized = (__private float*)__builtin_alloca_uninitialized(n*sizeof(int)); @@ -54,6 +164,61 @@ void test1_builtin_alloca_uninitialized(unsigned n) { // OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) // OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 // OPENCL-NEXT: ret void +// OPENCL12-LABEL: define dso_local void @test1_builtin_alloca_with_align( +// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL12-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL12-NEXT: ret void +// +// OPENCL20-LABEL: define dso_local void @test1_builtin_alloca_with_align( +// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL20-NEXT: [[ENTRY:.*:]] +// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr +// OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL20-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4 +// OPENCL20-NEXT: ret void +// +// OPENCL30-LABEL: define dso_local void @test1_builtin_alloca_with_align( +// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL30-NEXT: ret void +// +// OPENCL30GAS-LABEL: define dso_local void @test1_builtin_alloca_with_align( +// OPENCL30GAS-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30GAS-NEXT: [[ENTRY:.*:]] +// OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr +// OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30GAS-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4 +// OPENCL30GAS-NEXT: ret void // void test1_builtin_alloca_with_align(unsigned n) { __private float* alloc_ptr_align = (__private float*)__builtin_alloca_with_align((n*sizeof(int)), 8); @@ -71,6 +236,61 @@ void test1_builtin_alloca_with_align(unsigned n) { // OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) // OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 // OPENCL-NEXT: ret void +// OPENCL12-LABEL: define dso_local void @test1_builtin_alloca_with_align_uninitialized( +// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL12-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 +// OPENCL12-NEXT: ret void +// +// OPENCL20-LABEL: define dso_local void @test1_builtin_alloca_with_align_uninitialized( +// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL20-NEXT: [[ENTRY:.*:]] +// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr +// OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL20-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4 +// OPENCL20-NEXT: ret void +// +// OPENCL30-LABEL: define dso_local void @test1_builtin_alloca_with_align_uninitialized( +// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 +// OPENCL30-NEXT: ret void +// +// OPENCL30GAS-LABEL: define dso_local void @test1_builtin_alloca_with_align_uninitialized( +// OPENCL30GAS-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30GAS-NEXT: [[ENTRY:.*:]] +// OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr +// OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30GAS-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4 +// OPENCL30GAS-NEXT: ret void // void test1_builtin_alloca_with_align_uninitialized(unsigned n) { __private float* alloc_ptr_align_uninitialized = (__private float*)__builtin_alloca_with_align_uninitialized((n*sizeof(int)), 8); @@ -87,6 +307,57 @@ void test1_builtin_alloca_with_align_uninitialized(unsigned n) { // OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) // OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 // OPENCL-NEXT: ret void +// OPENCL12-LABEL: define dso_local void @test2_builtin_alloca( +// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL12-NEXT: ret void +// +// OPENCL20-LABEL: define dso_local void @test2_builtin_alloca( +// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL20-NEXT: [[ENTRY:.*:]] +// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL20-NEXT: [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr +// OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4 +// OPENCL20-NEXT: ret void +// +// OPENCL30-LABEL: define dso_local void @test2_builtin_alloca( +// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL30-NEXT: ret void +// +// OPENCL30GAS-LABEL: define dso_local void @test2_builtin_alloca( +// OPENCL30GAS-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30GAS-NEXT: [[ENTRY:.*:]] +// OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL30GAS-NEXT: [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr +// OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: ret void // void test2_builtin_alloca(unsigned n) { __private void *alloc_ptr = __builtin_alloca(n); @@ -103,6 +374,57 @@ void test2_builtin_alloca(unsigned n) { // OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) // OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 // OPENCL-NEXT: ret void +// OPENCL12-LABEL: define dso_local void @test2_builtin_alloca_uninitialized( +// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL12-NEXT: ret void +// +// OPENCL20-LABEL: define dso_local void @test2_builtin_alloca_uninitialized( +// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL20-NEXT: [[ENTRY:.*:]] +// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr +// OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4 +// OPENCL20-NEXT: ret void +// +// OPENCL30-LABEL: define dso_local void @test2_builtin_alloca_uninitialized( +// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL30-NEXT: ret void +// +// OPENCL30GAS-LABEL: define dso_local void @test2_builtin_alloca_uninitialized( +// OPENCL30GAS-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30GAS-NEXT: [[ENTRY:.*:]] +// OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL30GAS-NEXT: [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr +// OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4 +// OPENCL30GAS-NEXT: ret void // void test2_builtin_alloca_uninitialized(unsigned n) { __private void *alloc_ptr_uninitialized = __builtin_alloca_uninitialized(n); @@ -119,6 +441,57 @@ void test2_builtin_alloca_uninitialized(unsigned n) { // OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) // OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 // OPENCL-NEXT: ret void +// OPENCL12-LABEL: define dso_local void @test2_builtin_alloca_with_align( +// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL12-NEXT: ret void +// +// OPENCL20-LABEL: define dso_local void @test2_builtin_alloca_with_align( +// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL20-NEXT: [[ENTRY:.*:]] +// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr +// OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4 +// OPENCL20-NEXT: ret void +// +// OPENCL30-LABEL: define dso_local void @test2_builtin_alloca_with_align( +// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL30-NEXT: ret void +// +// OPENCL30GAS-LABEL: define dso_local void @test2_builtin_alloca_with_align( +// OPENCL30GAS-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30GAS-NEXT: [[ENTRY:.*:]] +// OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr +// OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4 +// OPENCL30GAS-NEXT: ret void // void test2_builtin_alloca_with_align(unsigned n) { __private void *alloc_ptr_align = __builtin_alloca_with_align(n, 8); @@ -135,6 +508,57 @@ void test2_builtin_alloca_with_align(unsigned n) { // OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) // OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 // OPENCL-NEXT: ret void +// OPENCL12-LABEL: define dso_local void @test2_builtin_alloca_with_align_uninitialized( +// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 +// OPENCL12-NEXT: ret void +// +// OPENCL20-LABEL: define dso_local void @test2_builtin_alloca_with_align_uninitialized( +// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL20-NEXT: [[ENTRY:.*:]] +// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr +// OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4 +// OPENCL20-NEXT: ret void +// +// OPENCL30-LABEL: define dso_local void @test2_builtin_alloca_with_align_uninitialized( +// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 +// OPENCL30-NEXT: ret void +// +// OPENCL30GAS-LABEL: define dso_local void @test2_builtin_alloca_with_align_uninitialized( +// OPENCL30GAS-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL30GAS-NEXT: [[ENTRY:.*:]] +// OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr +// OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4 +// OPENCL30GAS-NEXT: ret void // void test2_builtin_alloca_with_align_uninitialized(unsigned n) { __private void *alloc_ptr_align_uninitialized = __builtin_alloca_with_align_uninitialized(n, 8); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl index 3d74667b62b8c..9bfedac003296 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl @@ -8,8 +8,9 @@ typedef unsigned int uint; // CHECK-LABEL: @test_s_sleep_var( // CHECK-NEXT: entry: // CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store i32 [[D:%.*]], ptr addrspace(5) [[D_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[D_ADDR]], align 4 +// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4 // CHECK-NEXT: call void @llvm.amdgcn.s.sleep.var(i32 [[TMP0]]) // CHECK-NEXT: call void @llvm.amdgcn.s.sleep.var(i32 15) // CHECK-NEXT: ret void @@ -26,15 +27,19 @@ void test_s_sleep_var(int d) // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 -// CHECK-NEXT: store i32 [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 4 -// CHECK-NEXT: store i32 [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4 +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlane16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false) -// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 // CHECK-NEXT: ret void // @@ -48,15 +53,19 @@ void test_permlane16_var(global uint* out, uint a, uint b, uint c) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 -// CHECK-NEXT: store i32 [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 4 -// CHECK-NEXT: store i32 [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4 +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlanex16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false) -// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 // CHECK-NEXT: ret void // @@ -79,8 +88,9 @@ void test_s_barrier_signal() // CHECK-LABEL: @test_s_barrier_signal_var( // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal.var(i32 [[TMP0]]) // CHECK-NEXT: ret void // @@ -94,18 +104,21 @@ void test_s_barrier_signal_var(int a) // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: store ptr [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 8 -// CHECK-NEXT: store ptr [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 8 +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 1) // CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr addrspace(5) [[B_ADDR]], align 8 -// CHECK-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[TMP1]], ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: br label [[IF_END:%.*]] // CHECK: if.else: -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[C_ADDR]], align 8 -// CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: br label [[IF_END]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 1) @@ -127,20 +140,24 @@ void test_s_barrier_signal_isfirst(int* a, int* b, int *c) // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store ptr [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 8 -// CHECK-NEXT: store ptr [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 8 -// CHECK-NEXT: store i32 [[D:%.*]], ptr addrspace(5) [[D_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[D_ADDR]], align 4 +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst.var(i32 [[TMP0]]) // CHECK-NEXT: br i1 [[TMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[B_ADDR]], align 8 -// CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: br label [[IF_END:%.*]] // CHECK: if.else: -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr addrspace(5) [[C_ADDR]], align 8 -// CHECK-NEXT: store ptr [[TMP3]], ptr addrspace(5) [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[TMP3]], ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: br label [[IF_END]] // CHECK: if.end: // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 1) @@ -160,8 +177,9 @@ void test_s_barrier_isfirst_var(int* a, int* b, int *c, int d) // CHECK-LABEL: @test_s_barrier_init( // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.init(i32 1, i32 [[TMP0]]) // CHECK-NEXT: ret void // @@ -195,18 +213,21 @@ void test_s_wakeup_barrier() // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: store ptr [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 8 -// CHECK-NEXT: store ptr [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 8 +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.s.barrier.leave() // CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] // CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr addrspace(5) [[B_ADDR]], align 8 -// CHECK-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[TMP1]], ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: br label [[IF_END:%.*]] // CHECK: if.else: -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[C_ADDR]], align 8 -// CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: br label [[IF_END]] // CHECK: if.end: // CHECK-NEXT: ret void @@ -221,13 +242,17 @@ void test_s_barrier_leave(int* a, int* b, int *c) // CHECK-LABEL: @test_s_get_barrier_state( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr +// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.s.get.barrier.state(i32 [[TMP0]]) -// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[STATE]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[STATE]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[STATE_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4 // CHECK-NEXT: ret i32 [[TMP2]] // unsigned test_s_get_barrier_state(int a) @@ -262,16 +287,20 @@ void test_s_ttracedata_imm() // CHECK-NEXT: [[GP_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // CHECK-NEXT: [[CP_ADDR:%.*]] = alloca ptr addrspace(4), align 8, addrspace(5) // CHECK-NEXT: [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store ptr [[FP:%.*]], ptr addrspace(5) [[FP_ADDR]], align 8 -// CHECK-NEXT: store ptr addrspace(1) [[GP:%.*]], ptr addrspace(5) [[GP_ADDR]], align 8 -// CHECK-NEXT: store ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[CP_ADDR]], align 8 -// CHECK-NEXT: store i32 [[LEN:%.*]], ptr addrspace(5) [[LEN_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8 +// CHECK-NEXT: [[FP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FP_ADDR]] to ptr +// CHECK-NEXT: [[GP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GP_ADDR]] to ptr +// CHECK-NEXT: [[CP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CP_ADDR]] to ptr +// CHECK-NEXT: [[LEN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LEN_ADDR]] to ptr +// CHECK-NEXT: store ptr [[FP:%.*]], ptr [[FP_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(1) [[GP:%.*]], ptr [[GP_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(4) [[CP:%.*]], ptr [[CP_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[LEN:%.*]], ptr [[LEN_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR_ASCAST]], align 8 // CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p0(ptr [[TMP0]], i32 0) -// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[GP_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[LEN_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[GP_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[LEN_ADDR_ASCAST]], align 4 // CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p1(ptr addrspace(1) [[TMP1]], i32 [[TMP2]]) -// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(5) [[CP_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr [[CP_ADDR_ASCAST]], align 8 // CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p4(ptr addrspace(4) [[TMP3]], i32 31) // CHECK-NEXT: ret void // @@ -286,12 +315,14 @@ void test_s_prefetch_data(int *fp, global float *gp, constant char *cp, unsigned // CHECK-NEXT: entry: // CHECK-NEXT: [[RSRC_ADDR:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5) // CHECK-NEXT: [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(5) [[RSRC_ADDR]], align 16 -// CHECK-NEXT: store i32 [[LEN:%.*]], ptr addrspace(5) [[LEN_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[RSRC_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[LEN_ADDR]], align 4 +// CHECK-NEXT: [[RSRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RSRC_ADDR]] to ptr +// CHECK-NEXT: [[LEN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LEN_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(8) [[RSRC:%.*]], ptr [[RSRC_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[LEN:%.*]], ptr [[LEN_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr [[RSRC_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[LEN_ADDR_ASCAST]], align 4 // CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP0]], i32 128, i32 [[TMP1]]) -// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[RSRC_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(8), ptr [[RSRC_ADDR_ASCAST]], align 16 // CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP2]], i32 0, i32 31) // CHECK-NEXT: ret void // diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl index fc5649d8a41f7..a2f14c652c828 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl @@ -10,10 +10,12 @@ typedef unsigned char u8; // CHECK-NEXT: entry: // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // CHECK-NEXT: [[DST_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) -// CHECK-NEXT: store ptr addrspace(1) [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 8 -// CHECK-NEXT: store ptr addrspace(3) [[DST:%.*]], ptr addrspace(5) [[DST_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[SRC_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[DST_ADDR]], align 4 +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr +// CHECK-NEXT: [[DST_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(3) [[DST:%.*]], ptr [[DST_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[SRC_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr [[DST_ADDR_ASCAST]], align 4 // CHECK-NEXT: call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) [[TMP0]], ptr addrspace(3) [[TMP1]], i32 4, i32 0, i32 0) // CHECK-NEXT: ret void // @@ -25,10 +27,12 @@ void test_global_load_lds_u32(global u32* src, local u32 *dst) { // CHECK-NEXT: entry: // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // CHECK-NEXT: [[DST_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) -// CHECK-NEXT: store ptr addrspace(1) [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 8 -// CHECK-NEXT: store ptr addrspace(3) [[DST:%.*]], ptr addrspace(5) [[DST_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[SRC_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[DST_ADDR]], align 4 +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr +// CHECK-NEXT: [[DST_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(3) [[DST:%.*]], ptr [[DST_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[SRC_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr [[DST_ADDR_ASCAST]], align 4 // CHECK-NEXT: call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) [[TMP0]], ptr addrspace(3) [[TMP1]], i32 2, i32 0, i32 0) // CHECK-NEXT: ret void // @@ -40,10 +44,12 @@ void test_global_load_lds_u16(global u16* src, local u16 *dst) { // CHECK-NEXT: entry: // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // CHECK-NEXT: [[DST_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) -// CHECK-NEXT: store ptr addrspace(1) [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 8 -// CHECK-NEXT: store ptr addrspace(3) [[DST:%.*]], ptr addrspace(5) [[DST_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[SRC_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[DST_ADDR]], align 4 +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr +// CHECK-NEXT: [[DST_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(3) [[DST:%.*]], ptr [[DST_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[SRC_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr [[DST_ADDR_ASCAST]], align 4 // CHECK-NEXT: call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) [[TMP0]], ptr addrspace(3) [[TMP1]], i32 1, i32 0, i32 0) // CHECK-NEXT: ret void // diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl index e8b6eb57c38d7..461abc3708128 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl @@ -71,7 +71,7 @@ short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) { // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} // GFX12-LABEL: test_global_add_half2 -// GFX12: global_atomic_pk_add_f16 v2, v[0:1], v2, off th:TH_ATOMIC_RETURN +// GFX12: global_atomic_pk_add_f16 v2, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off th:TH_ATOMIC_RETURN void test_global_add_half2(__global half2 *addr, half2 x) { half2 *rtn; *rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x); @@ -93,7 +93,7 @@ void test_global_add_half2_noret(__global half2 *addr, half2 x) { // GFX12-LABEL: test_global_add_2bf16 -// GFX12: global_atomic_pk_add_bf16 v2, v[0:1], v2, off th:TH_ATOMIC_RETURN +// GFX12: global_atomic_pk_add_bf16 v2, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off th:TH_ATOMIC_RETURN void test_global_add_2bf16(__global short2 *addr, short2 x) { short2 *rtn; *rtn = __builtin_amdgcn_global_atomic_fadd_v2bf16(addr, x); diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl index 2f00977ec6014..2728490d5c02e 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx8.cl @@ -8,7 +8,7 @@ // CHECK-LABEL: test_fadd_local // CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, float %{{.+}} monotonic, align 4 // GFX8-LABEL: test_fadd_local$local: -// GFX8: ds_add_rtn_f32 v2, v0, v1 +// GFX8: ds_add_rtn_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} // GFX8: s_endpgm kernel void test_fadd_local(__local float *ptr, float val){ float *res; diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl index 556e553903d1a..ef97d12afab1d 100644 --- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl @@ -20,7 +20,7 @@ void test_global_add_f64(__global double *addr, double x) { // CHECK-LABEL: test_global_add_half2 // CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} // GFX90A-LABEL: test_global_add_half2 -// GFX90A: global_atomic_pk_add_f16 v2, v[0:1], v2, off glc +// GFX90A: global_atomic_pk_add_f16 v2, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc void test_global_add_half2(__global half2 *addr, half2 x) { half2 *rtn; *rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x); diff --git a/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl b/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl index 625f0660482a9..b1e45e6d6e6dc 100644 --- a/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl +++ b/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl @@ -18,7 +18,7 @@ kernel void test(int i) { // SPIR64: %block_sizes = alloca [1 x i64] // COMMON-LABEL: if.then: // COMMON-NOT: alloca -// CHECK-DEBUG: getelementptr {{.*}} %block_sizes, {{.*}} !dbg ![[TEMPLOCATION:[0-9]+]] +// CHECK-DEBUG: getelementptr {{.*}} %block_sizes{{.*}}, {{.*}} !dbg ![[TEMPLOCATION:[0-9]+]] // COMMON-LABEL: if.end queue_t default_queue; unsigned flags = 0; diff --git a/clang/test/CodeGenOpenCL/opencl_types.cl b/clang/test/CodeGenOpenCL/opencl_types.cl index 5b1c2afd4f1e3..7cab853c76d9a 100644 --- a/clang/test/CodeGenOpenCL/opencl_types.cl +++ b/clang/test/CodeGenOpenCL/opencl_types.cl @@ -42,7 +42,7 @@ kernel void foo(image1d_t img) { // CHECK-AMDGCN: alloca ptr addrspace(4) event_t evt; // CHECK-SPIR: alloca target("spirv.Event") - // CHECK-AMDGCN: alloca ptr addrspace(5) + // CHECK-AMDGCN: alloca ptr clk_event_t clk_evt; // CHECK-SPIR: alloca target("spirv.DeviceEvent") // CHECK-AMDGCN: alloca ptr addrspace(1) diff --git a/clang/test/Index/pipe-size.cl b/clang/test/Index/pipe-size.cl index 08b936f1a9b07..a48857baef1a6 100644 --- a/clang/test/Index/pipe-size.cl +++ b/clang/test/Index/pipe-size.cl @@ -11,6 +11,6 @@ __kernel void testPipe( pipe int test ) // SPIR: store i32 4, ptr %s, align 4 // SPIR64: store target("spirv.Pipe", 0) %test, ptr %test.addr, align 8 // SPIR64: store i32 8, ptr %s, align 4 - // AMDGCN: store ptr addrspace(1) %test, ptr addrspace(5) %test.addr, align 8 - // AMDGCN: store i32 8, ptr addrspace(5) %s, align 4 + // AMDGCN: store ptr addrspace(1) %test, ptr %test{{.*}}, align 8 + // AMDGCN: store i32 8, ptr %s{{.*}}, align 4 } From a742e92de136e5c648200a87449bf6e95318e6c5 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 15 Oct 2024 22:59:26 +0100 Subject: [PATCH 2/7] Fix formatting. --- clang/lib/CodeGen/CGBlocks.cpp | 2 +- clang/lib/CodeGen/CGBuiltin.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index c3a266285011f..41bb8d19d161e 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1398,7 +1398,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, DI->EmitDeclareOfBlockLiteralArgVariable( *BlockInfo, D->getName(), argNum, cast(alloc.getPointer()->stripPointerCasts()), - Builder); + Builder); } } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1b01484a46cf7..fb9386aba9567 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5859,7 +5859,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *TmpPtr = Tmp.getPointer(); llvm::Value *TmpSize = EmitLifetimeStart( CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), - TmpPtr->stripPointerCasts()); + TmpPtr->stripPointerCasts()); llvm::Value *ElemPtr; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. From 7fcb506c120f21bc359c1ff797dc169453d0ef0a Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 16 Oct 2024 14:35:56 +0100 Subject: [PATCH 3/7] Strip less. --- clang/lib/CodeGen/CGBuiltin.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index dfcba05d5f146..9328f9cd8a30e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5857,9 +5857,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, /*IndexTypeQuals=*/0); auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); llvm::Value *TmpPtr = Tmp.getPointer(); + // The EmitLifetime* pair expect a naked Alloca as their last argument, + // however for cases where the default AS is not the Alloca AS, Tmp is + // actually the Alloca ascasted to the default AS, hence the + // stripPointerCasts() + llvm::Value *Alloca = TmpPtr->stripPointerCasts(); llvm::Value *TmpSize = EmitLifetimeStart( - CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), - TmpPtr->stripPointerCasts()); + CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca); llvm::Value *ElemPtr; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. @@ -5875,7 +5879,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateAlignedStore( V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy)); } - return std::tie(ElemPtr, TmpSize, TmpPtr); + // Return the Alloca itself rather than a potential ascast as this is only + // used by the paired EmitLifetimeEnd. + return std::tie(ElemPtr, TmpSize, Alloca); }; // Could have events and/or varargs. @@ -5904,7 +5910,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto Call = RValue::get( EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); if (TmpSize) - EmitLifetimeEnd(TmpSize, TmpPtr->stripPointerCasts()); + EmitLifetimeEnd(TmpSize, TmpPtr); return Call; } // Any calls now have event arguments passed. From c4e603db833b5de6da2addc84eed9686db4eff86 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 16 Oct 2024 19:09:24 +0100 Subject: [PATCH 4/7] Fix borked merge. --- .../CodeGenOpenCL/addr-space-struct-arg.cl | 1394 ----------------- 1 file changed, 1394 deletions(-) diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index a40d2a4db2717..7377b5bcbc347 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -624,8 +624,6 @@ kernel void test_indirect_arg_local(void) { FuncOneLargeMember(l_s); } -<<<<<<< HEAD -======= // // X86-LABEL: define void @test_indirect_arg_private( // X86-SAME: ) #[[ATTR0]] { @@ -669,14 +667,11 @@ kernel void test_indirect_arg_local(void) { // AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] // AMDGCN30-NEXT: ret void // ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b void test_indirect_arg_private(void) { struct LargeStructOneMember p_s; FuncOneLargeMember(p_s); } -<<<<<<< HEAD -======= // // X86-LABEL: define spir_kernel void @KernelOneMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { @@ -734,13 +729,10 @@ void test_indirect_arg_private(void) { // AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] // AMDGCN30-NEXT: ret void // ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b kernel void KernelOneMember(struct StructOneMember u) { FuncOneMember(u); } -<<<<<<< HEAD -======= // // X86-LABEL: define spir_kernel void @KernelOneMemberSpir( // X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { @@ -808,13 +800,10 @@ kernel void KernelOneMember(struct StructOneMember u) { // AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] // AMDGCN30-NEXT: ret void // ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b kernel void KernelOneMemberSpir(global struct StructOneMember* u) { FuncOneMember(*u); } -<<<<<<< HEAD -======= // // X86-LABEL: define spir_kernel void @KernelLargeOneMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { @@ -868,13 +857,10 @@ kernel void KernelOneMemberSpir(global struct StructOneMember* u) { // AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] // AMDGCN30-NEXT: ret void // ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b kernel void KernelLargeOneMember(struct LargeStructOneMember u) { FuncOneLargeMember(u); } -<<<<<<< HEAD -======= // // X86-LABEL: define void @FuncTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { @@ -958,13 +944,10 @@ kernel void KernelLargeOneMember(struct LargeStructOneMember u) { // AMDGCN30-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 // AMDGCN30-NEXT: ret void // ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b void FuncTwoMember(struct StructTwoMember u) { u.y = (int2)(0, 0); } -<<<<<<< HEAD -======= // // X86-LABEL: define void @FuncLargeTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { @@ -1042,13 +1025,10 @@ void FuncTwoMember(struct StructTwoMember u) { // AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 // AMDGCN30-NEXT: ret void // ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b void FuncLargeTwoMember(struct LargeStructTwoMember u) { u.y[0] = (int2)(0, 0); } -<<<<<<< HEAD -======= // // X86-LABEL: define spir_kernel void @KernelTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { @@ -1130,13 +1110,10 @@ void FuncLargeTwoMember(struct LargeStructTwoMember u) { // AMDGCN30-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] // AMDGCN30-NEXT: ret void // ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b kernel void KernelTwoMember(struct StructTwoMember u) { FuncTwoMember(u); } -<<<<<<< HEAD -======= // // X86-LABEL: define spir_kernel void @KernelLargeTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { @@ -1202,1331 +1179,10 @@ kernel void KernelTwoMember(struct StructTwoMember u) { // AMDGCN30-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] // AMDGCN30-NEXT: ret void // ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { FuncLargeTwoMember(u); } //. -<<<<<<< HEAD -// X86: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 -//. -// AMDGCN: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 -// AMDGCN: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 -//. -// AMDGCN20: @g_s = addrspace(1) global %struct.LargeStructOneMember zeroinitializer, align 8 -// AMDGCN20: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 -// AMDGCN20: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 -//. -// SPIR: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 -//. -// AMDGCN30: @g_s = addrspace(1) global %struct.LargeStructOneMember zeroinitializer, align 8 -// AMDGCN30: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 -// AMDGCN30: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 -//. -// AMDGCN30-GVAR: @test_indirect_arg_local.l_s = internal addrspace(3) global %struct.LargeStructOneMember undef, align 8 -// AMDGCN30-GVAR: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 -//. -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define void @foo( -// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4 -// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define spir_kernel void @ker( -// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 -// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4 -// X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 -// X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 -// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 -// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0 -// X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 -// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1 -// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false) -// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]] -// X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define void @foo_large( -// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4 -// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define spir_kernel void @ker_large( -// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4 -// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4 -// X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 -// X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 -// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 -// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0 -// X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 -// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1 -// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false) -// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define void @FuncOneMember( -// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 -// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) -// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 -// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[X]], align 8 -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define void @FuncOneLargeMember( -// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 -// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) -// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 -// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 -// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define spir_kernel void @test_indirect_arg_local( -// X86-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4 -// X86-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) -// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define void @test_indirect_arg_private( -// X86-SAME: ) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 -// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define spir_kernel void @KernelOneMember( -// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define spir_kernel void @KernelOneMemberSpir( -// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 4 -// X86-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 -// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 -// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false) -// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define spir_kernel void @KernelLargeOneMember( -// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define void @FuncTwoMember( -// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 -// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) -// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 -// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[Y]], align 8 -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define void @FuncLargeTwoMember( -// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 -// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false) -// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 -// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 -// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define spir_kernel void @KernelTwoMember( -// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// -// X86: Function Attrs: convergent noinline norecurse nounwind optnone -// X86-LABEL: define spir_kernel void @KernelLargeTwoMember( -// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo( -// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) -// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 -// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 -// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 -// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker( -// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 -// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 -// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local void @foo_large( -// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large( -// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) -// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) -// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local void @FuncOneMember( -// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember( -// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( -// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private( -// AMDGCN-SAME: ) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( -// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 -// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( -// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 -// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local void @FuncTwoMember( -// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember( -// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// -// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local %struct.Mat4X4 @foo( -// AMDGCN20-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0 -// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4 -// AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker( -// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr -// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 -// AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4 -// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false) -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local void @foo_large( -// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr -// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker_large( -// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr -// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN20-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false) -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local void @FuncOneMember( -// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local void @FuncOneLargeMember( -// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr -// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 -// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_globl( -// AMDGCN20-SAME: ) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) -// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( -// AMDGCN20-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) -// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_private( -// AMDGCN20-SAME: ) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false) -// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( -// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 -// AMDGCN20-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8 -// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( -// AMDGCN20-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN20-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr -// AMDGCN20-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 -// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false) -// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local void @FuncTwoMember( -// AMDGCN20-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8 -// AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8 -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8 -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local void @FuncLargeTwoMember( -// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr -// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 -// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN20-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN20-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8 -// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 -// AMDGCN20-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -// AMDGCN20-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// -// AMDGCN20: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN20-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8 -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false) -// AMDGCN20-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_func void @foo( -// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_kernel void @ker( -// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 -// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4 -// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 -// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 -// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 -// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0 -// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 -// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1 -// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false) -// SPIR-NEXT: call spir_func void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]] -// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_func void @foo_large( -// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_kernel void @ker_large( -// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META6]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4 -// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4 -// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 -// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 -// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 -// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0 -// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 -// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1 -// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false) -// SPIR-NEXT: call spir_func void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_func void @FuncOneMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 -// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_func void @FuncOneLargeMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 -// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 -// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8 -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_kernel void @test_indirect_arg_local( -// SPIR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 -// SPIR-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) -// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_func void @test_indirect_arg_private( -// SPIR-SAME: ) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 -// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META10:![0-9]+]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12:![0-9]+]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMemberSpir( -// SPIR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META12]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8 -// SPIR-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 -// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 -// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false) -// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeOneMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META12]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_func void @FuncTwoMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 -// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[Y]], align 8 -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_func void @FuncLargeTwoMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 -// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 -// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8 -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_kernel void @KernelTwoMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META12]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: call spir_func void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// SPIR-NEXT: ret void -// -// -// SPIR: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeTwoMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META12]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: call spir_func void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// SPIR-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local %struct.Mat4X4 @foo( -// AMDGCN30-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) -// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 -// AMDGCN30-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 -// AMDGCN30-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 -// AMDGCN30-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker( -// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 -// AMDGCN30-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN30-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 -// AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local void @foo_large( -// AMDGCN30-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) -// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker_large( -// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) -// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) -// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN30-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local void @FuncOneMember( -// AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local void @FuncOneLargeMember( -// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 -// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local void @test_indirect_arg_globl( -// AMDGCN30-SAME: ) #[[ATTR0]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) -// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( -// AMDGCN30-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) -// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local void @test_indirect_arg_private( -// AMDGCN30-SAME: ) #[[ATTR0]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( -// AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN30-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 -// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( -// AMDGCN30-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 -// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local void @FuncTwoMember( -// AMDGCN30-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 -// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local void @FuncLargeTwoMember( -// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) -// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 -// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN30-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN30-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN30-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN30-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN30-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN30-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// AMDGCN30-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local %struct.Mat4X4 @foo( -// AMDGCN30-GVAR-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 -// AMDGCN30-GVAR-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker( -// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 -// AMDGCN30-GVAR-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN30-GVAR-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local void @foo_large( -// AMDGCN30-GVAR-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker_large( -// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN30-GVAR-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneMember( -// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneLargeMember( -// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( -// AMDGCN30-GVAR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) -// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_private( -// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( -// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 -// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( -// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 -// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-GVAR-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local void @FuncTwoMember( -// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 -// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local void @FuncLargeTwoMember( -// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) -// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN30-GVAR-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN30-GVAR-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void -// -// -// AMDGCN30-GVAR: Function Attrs: convergent noinline norecurse nounwind optnone -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-GVAR-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN30-GVAR-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN30-GVAR-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void -// -//. -// X86: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } -// X86: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" "uniform-work-group-size"="true" } -// X86: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// X86: attributes #[[ATTR3]] = { convergent nounwind } -//. -// AMDGCN: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// AMDGCN: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// AMDGCN: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// AMDGCN: attributes #[[ATTR3]] = { convergent nounwind } -//. -// AMDGCN20: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// AMDGCN20: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } -// AMDGCN20: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// AMDGCN20: attributes #[[ATTR3]] = { convergent nounwind } -//. -// SPIR: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// SPIR: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// SPIR: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// SPIR: attributes #[[ATTR3]] = { convergent nounwind } -//. -// AMDGCN30: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// AMDGCN30: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } -// AMDGCN30: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// AMDGCN30: attributes #[[ATTR3]] = { convergent nounwind } -//. -// AMDGCN30-GVAR: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// AMDGCN30-GVAR: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } -// AMDGCN30-GVAR: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// AMDGCN30-GVAR: attributes #[[ATTR3]] = { convergent nounwind } -//. -// X86: [[META0:![0-9]+]] = !{i32 1, !"NumRegisterParameters", i32 0} -// X86: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// X86: [[META2:![0-9]+]] = !{i32 1, i32 2} -// X86: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -======= ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b // X86: [[META4]] = !{i32 1, i32 1} // X86: [[META5]] = !{!"none", !"none"} // X86: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} @@ -2543,13 +1199,6 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // X86: [[META17]] = !{!"struct StructTwoMember"} // X86: [[META18]] = !{!"struct LargeStructTwoMember"} //. -<<<<<<< HEAD -// AMDGCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -// AMDGCN: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// AMDGCN: [[META2:![0-9]+]] = !{i32 1, i32 2} -// AMDGCN: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -======= ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b // AMDGCN: [[META4]] = !{i32 1, i32 1} // AMDGCN: [[META5]] = !{!"none", !"none"} // AMDGCN: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} @@ -2566,13 +1215,6 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // AMDGCN: [[META17]] = !{!"struct StructTwoMember"} // AMDGCN: [[META18]] = !{!"struct LargeStructTwoMember"} //. -<<<<<<< HEAD -// AMDGCN20: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -// AMDGCN20: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// AMDGCN20: [[META2:![0-9]+]] = !{i32 2, i32 0} -// AMDGCN20: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -======= ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b // AMDGCN20: [[META4]] = !{i32 1, i32 1} // AMDGCN20: [[META5]] = !{!"none", !"none"} // AMDGCN20: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} @@ -2589,12 +1231,6 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // AMDGCN20: [[META17]] = !{!"struct StructTwoMember"} // AMDGCN20: [[META18]] = !{!"struct LargeStructTwoMember"} //. -<<<<<<< HEAD -// SPIR: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// SPIR: [[META1:![0-9]+]] = !{i32 1, i32 2} -// SPIR: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -======= ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b // SPIR: [[META3]] = !{i32 1, i32 1} // SPIR: [[META4]] = !{!"none", !"none"} // SPIR: [[META5]] = !{!"Mat3X3*", !"Mat4X4*"} @@ -2611,33 +1247,6 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // SPIR: [[META16]] = !{!"struct StructTwoMember"} // SPIR: [[META17]] = !{!"struct LargeStructTwoMember"} //. -<<<<<<< HEAD -// AMDGCN30: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -// AMDGCN30: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// AMDGCN30: [[META2:![0-9]+]] = !{i32 3, i32 0} -// AMDGCN30: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -// AMDGCN30: [[META4]] = !{i32 1, i32 1} -// AMDGCN30: [[META5]] = !{!"none", !"none"} -// AMDGCN30: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} -// AMDGCN30: [[META7]] = !{!"", !""} -// AMDGCN30: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} -// AMDGCN30: [[META9]] = !{} -// AMDGCN30: [[META10]] = !{i32 0} -// AMDGCN30: [[META11]] = !{!"none"} -// AMDGCN30: [[META12]] = !{!"struct StructOneMember"} -// AMDGCN30: [[META13]] = !{!""} -// AMDGCN30: [[META14]] = !{i32 1} -// AMDGCN30: [[META15]] = !{!"struct StructOneMember*"} -// AMDGCN30: [[META16]] = !{!"struct LargeStructOneMember"} -// AMDGCN30: [[META17]] = !{!"struct StructTwoMember"} -// AMDGCN30: [[META18]] = !{!"struct LargeStructTwoMember"} -//. -// AMDGCN30-GVAR: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -// AMDGCN30-GVAR: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// AMDGCN30-GVAR: [[META2:![0-9]+]] = !{i32 3, i32 0} -// AMDGCN30-GVAR: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -======= ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b // AMDGCN30-GVAR: [[META4]] = !{i32 1, i32 1} // AMDGCN30-GVAR: [[META5]] = !{!"none", !"none"} // AMDGCN30-GVAR: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} @@ -2654,8 +1263,6 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // AMDGCN30-GVAR: [[META17]] = !{!"struct StructTwoMember"} // AMDGCN30-GVAR: [[META18]] = !{!"struct LargeStructTwoMember"} //. -<<<<<<< HEAD -======= // AMDGCN30: [[META4]] = !{i32 1, i32 1} // AMDGCN30: [[META5]] = !{!"none", !"none"} // AMDGCN30: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} @@ -2672,4 +1279,3 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // AMDGCN30: [[META17]] = !{!"struct StructTwoMember"} // AMDGCN30: [[META18]] = !{!"struct LargeStructTwoMember"} //. ->>>>>>> fc362521a3a5d67e3059ca02b504d87c32ede02b From e7fdefd2c93128f728638ba136bfa8c5f3445de8 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 16 Oct 2024 19:11:02 +0100 Subject: [PATCH 5/7] Update tests. --- .../CodeGenOpenCL/addr-space-struct-arg.cl | 169 ++++++++++-------- .../amdgcn-automatic-variable.cl | 35 ++-- 2 files changed, 119 insertions(+), 85 deletions(-) diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index 7377b5bcbc347..57d056b0ff9d5 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -69,9 +69,11 @@ struct LargeStructOneMember g_s; // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) // AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 -// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0 +// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4 // AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] // // SPIR-LABEL: define dso_local spir_func void @foo( @@ -150,19 +152,22 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { // AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 // AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 // AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 // AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0 // AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 -// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4 +// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false) // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_kernel void @ker( @@ -245,10 +250,11 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { // AMDGCN-NEXT: ret void // // AMDGCN20-LABEL: define dso_local void @foo_large( -// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_func void @foo_large( @@ -319,15 +325,18 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) // AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 // AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN20-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN20-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false) // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_kernel void @ker_large( @@ -419,12 +428,14 @@ kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_func void @FuncOneMember( @@ -497,14 +508,16 @@ void FuncOneMember(struct StructOneMember u) { // AMDGCN20-LABEL: define dso_local void @FuncOneLargeMember( // AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_func void @FuncOneLargeMember( @@ -643,7 +656,10 @@ kernel void test_indirect_arg_local(void) { // AMDGCN20-SAME: ) #[[ATTR0]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_func void @test_indirect_arg_private( @@ -694,10 +710,11 @@ void test_indirect_arg_private(void) { // AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN20-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8 // AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] // AMDGCN20-NEXT: ret void // @@ -760,8 +777,9 @@ kernel void KernelOneMember(struct StructOneMember u) { // AMDGCN20-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN20-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN20-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8 // AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 // AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 // AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] @@ -825,10 +843,13 @@ kernel void KernelOneMemberSpir(global struct StructOneMember* u) { // AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 // AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN20-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_kernel void @KernelLargeOneMember( @@ -894,14 +915,16 @@ kernel void KernelLargeOneMember(struct LargeStructOneMember u) { // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN20-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN20-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8 +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8 // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_func void @FuncTwoMember( @@ -978,14 +1001,16 @@ void FuncTwoMember(struct StructTwoMember u) { // AMDGCN20-LABEL: define dso_local void @FuncLargeTwoMember( // AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_func void @FuncLargeTwoMember( @@ -1057,16 +1082,17 @@ void FuncLargeTwoMember(struct LargeStructTwoMember u) { // AMDGCN20-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 // AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 // AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN20-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN20-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN20-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN20-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8 +// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 +// AMDGCN20-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 // AMDGCN20-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] // AMDGCN20-NEXT: ret void // @@ -1138,13 +1164,16 @@ kernel void KernelTwoMember(struct StructTwoMember u) { // AMDGCN20-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 // AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 // AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN20-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN20-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN20-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8 +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false) +// AMDGCN20-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_kernel void @KernelLargeTwoMember( diff --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl index 0ee91858ad9af..dba6519966eb5 100644 --- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl +++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl @@ -15,8 +15,9 @@ // CL20-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { // CL20-NEXT: [[ENTRY:.*:]] // CL20-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CL20-NEXT: store ptr [[X]], ptr addrspace(5) [[X_ADDR]], align 8 -// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[X_ADDR]], align 8 +// CL20-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CL20-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8 +// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 // CL20-NEXT: store i32 1, ptr [[TMP0]], align 4 // CL20-NEXT: ret void // @@ -54,19 +55,22 @@ void func1(int *x) { // CL20-NEXT: [[LP1:%.*]] = alloca ptr, align 8, addrspace(5) // CL20-NEXT: [[LP2:%.*]] = alloca ptr, align 8, addrspace(5) // CL20-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5) -// CL20-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4 -// CL20-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4 -// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0 -// CL20-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4 // CL20-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr -// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr addrspace(5) [[LP1]], align 8 -// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0 -// CL20-NEXT: [[ARRAYDECAY_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARRAYDECAY]] to ptr -// CL20-NEXT: store ptr [[ARRAYDECAY_ASCAST]], ptr addrspace(5) [[LP2]], align 8 -// CL20-NEXT: [[LV1_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr -// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST1]]) #[[ATTR2:[0-9]+]] -// CL20-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4 -// CL20-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4 +// CL20-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] to ptr +// CL20-NEXT: [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to ptr +// CL20-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] to ptr +// CL20-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] to ptr +// CL20-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] to ptr +// CL20-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4 +// CL20-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4 +// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0 +// CL20-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4 +// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8 +// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0 +// CL20-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8 +// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]] +// CL20-NEXT: store i32 4, ptr [[LVC_ASCAST]], align 4 +// CL20-NEXT: store i32 4, ptr [[LV1_ASCAST]], align 4 // CL20-NEXT: ret void // void func2(void) { @@ -98,7 +102,8 @@ void func2(void) { // CL20-SAME: ) #[[ATTR0]] { // CL20-NEXT: [[ENTRY:.*:]] // CL20-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5) -// CL20-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false) +// CL20-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CL20-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, i64 64, i1 false) // CL20-NEXT: ret void // void func3(void) { From 988a1b889ba088697b28489608f4b28c63e2a941 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 22 Oct 2024 00:18:30 +0100 Subject: [PATCH 6/7] Fix mangled test. --- clang/test/CodeGenOpenCL/atomic-ops.cl | 1021 ++++++------------------ 1 file changed, 233 insertions(+), 788 deletions(-) diff --git a/clang/test/CodeGenOpenCL/atomic-ops.cl b/clang/test/CodeGenOpenCL/atomic-ops.cl index 1e46747df3536..6c0cb3487fb18 100644 --- a/clang/test/CodeGenOpenCL/atomic-ops.cl +++ b/clang/test/CodeGenOpenCL/atomic-ops.cl @@ -1,4 +1,3 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa \ // RUN: | FileCheck %s @@ -36,877 +35,323 @@ typedef enum memory_scope { atomic_int j; -// CHECK-LABEL: define dso_local void @fi1( -// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP1]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP3]] to ptr -// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP2]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP3]] syncscope("agent") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP4]], ptr [[ATOMIC_TEMP1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ATOMIC_TEMP1_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP5]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP8]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = load atomic i32, ptr [[TMP9]] syncscope("wavefront") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP10]], ptr [[ATOMIC_TEMP3_ASCAST]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ATOMIC_TEMP3_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP11]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: ret void -// void fi1(atomic_int *i) { + // CHECK-LABEL: @fi1 + // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4{{$}} int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); + // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst, align 4{{$}} x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); + // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst, align 4{{$}} x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); } -// CHECK-LABEL: define dso_local void @fi2( -// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr -// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: ret void -// void fi2(atomic_int *i) { + // CHECK-LABEL: @fi2 + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4{{$}} __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); } -// CHECK-LABEL: define dso_local void @test_addr( -// CHECK-SAME: ptr addrspace(1) noundef [[IG:%.*]], ptr addrspace(5) noundef [[IP:%.*]], ptr addrspace(3) noundef [[IL:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[IG_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// CHECK-NEXT: [[IP_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// CHECK-NEXT: [[IL_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP2:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[IG_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IG_ADDR]] to ptr -// CHECK-NEXT: [[IP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IP_ADDR]] to ptr -// CHECK-NEXT: [[IL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IL_ADDR]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP2]] to ptr -// CHECK-NEXT: store ptr addrspace(1) [[IG]], ptr [[IG_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr addrspace(5) [[IP]], ptr [[IP_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store ptr addrspace(3) [[IL]], ptr [[IL_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IG_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(1) [[TMP0]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(5), ptr [[IP_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 -// CHECK-NEXT: store atomic i32 [[TMP3]], ptr addrspace(5) [[TMP2]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[IL_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP2_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTATOMICTMP2_ASCAST]], align 4 -// CHECK-NEXT: store atomic i32 [[TMP5]], ptr addrspace(3) [[TMP4]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: ret void -// void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) { + // CHECK-LABEL: @test_addr + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(1) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4{{$}} __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group); + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(5) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4{{$}} __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group); + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, ptr addrspace(3) %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4{{$}} __opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group); } -// CHECK-LABEL: define dso_local void @fi3( -// CHECK-SAME: ptr noundef [[I:%.*]], ptr noundef [[UI:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[UI_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// CHECK-NEXT: [[UI_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[UI_ADDR]] to ptr -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr -// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[UI]], ptr [[UI_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP3]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = atomicrmw min ptr [[TMP4]], i32 [[TMP5]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP7]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = atomicrmw max ptr [[TMP8]], i32 [[TMP9]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP11]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[UI_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 -// CHECK-NEXT: [[TMP14:%.*]] = atomicrmw umin ptr [[TMP12]], i32 [[TMP13]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP15]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[UI_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = atomicrmw umax ptr [[TMP16]], i32 [[TMP17]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP18]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP19]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: ret void -// void fi3(atomic_int *i, atomic_uint *ui) { + // CHECK-LABEL: @fi3 + // CHECK: atomicrmw and ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE:![0-9]+]]{{$}} int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); + // CHECK: atomicrmw min ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); + // CHECK: atomicrmw max ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); + // CHECK: atomicrmw umin ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group); + // CHECK: atomicrmw umax ptr %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group); } -// CHECK-LABEL: define dso_local zeroext i1 @fi4( -// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) -// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// CHECK-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr -// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr -// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("workgroup-one-as") acquire acquire, align 4 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 -// CHECK-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] -// CHECK: [[CMPXCHG_STORE_EXPECTED]]: -// CHECK-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]] -// CHECK: [[CMPXCHG_CONTINUE]]: -// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 -// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 -// CHECK-NEXT: ret i1 [[LOADEDV]] -// bool fi4(atomic_int *i) { + // CHECK-LABEL: @fi4( + // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg ptr [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 + // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 + // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] + // CHECK: store i32 [[OLD]] int cmp = 0; return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group); } -// CHECK-LABEL: define dso_local void @fi5( -// CHECK-SAME: ptr noundef [[I:%.*]], i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// CHECK-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr -// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4 -// CHECK-NEXT: switch i32 [[TMP1]], label %[[OPENCL_ALLSVMDEVICES:.*]] [ -// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP:.*]] -// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE:.*]] -// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP:.*]] -// CHECK-NEXT: ] -// CHECK: [[OPENCL_WORKGROUP]]: -// CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE:.*]] -// CHECK: [[OPENCL_DEVICE]]: -// CHECK-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// CHECK: [[OPENCL_ALLSVMDEVICES]]: -// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP0]] seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP4]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// CHECK: [[OPENCL_SUBGROUP]]: -// CHECK-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP5]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// CHECK: [[ATOMIC_SCOPE_CONTINUE]]: -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: ret void -// void fi5(atomic_int *i, int scope) { + // CHECK-LABEL: @fi5 + // CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [ + // CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]] + // CHECK-NEXT: i32 2, label %[[opencl_device:.*]] + // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]] + // CHECK-NEXT: ] + // CHECK: [[opencl_workgroup]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup") seq_cst, align 4{{$}} + // CHECK: br label %[[continue:.*]] + // CHECK: [[opencl_device]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent") seq_cst, align 4{{$}} + // CHECK: br label %[[continue]] + // CHECK: [[opencl_allsvmdevices]]: + // CHECK: load atomic i32, ptr %{{.*}} seq_cst, align 4 + // CHECK: br label %[[continue]] + // CHECK: [[opencl_subgroup]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront") seq_cst, align 4{{$}} + // CHECK: br label %[[continue]] + // CHECK: [[continue]]: int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); } -// CHECK-LABEL: define dso_local void @fi6( -// CHECK-SAME: ptr noundef [[I:%.*]], i32 noundef [[ORDER:%.*]], i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[ORDER_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// CHECK-NEXT: [[ORDER_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORDER_ADDR]] to ptr -// CHECK-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr -// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[ORDER]], ptr [[ORDER_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ORDER_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4 -// CHECK-NEXT: switch i32 [[TMP1]], label %[[MONOTONIC:.*]] [ -// CHECK-NEXT: i32 1, label %[[ACQUIRE:.*]] -// CHECK-NEXT: i32 2, label %[[ACQUIRE]] -// CHECK-NEXT: i32 5, label %[[SEQCST:.*]] -// CHECK-NEXT: ] -// CHECK: [[MONOTONIC]]: -// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES:.*]] [ -// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP:.*]] -// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE:.*]] -// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP:.*]] -// CHECK-NEXT: ] -// CHECK: [[ACQUIRE]]: -// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES3:.*]] [ -// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP1:.*]] -// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE2:.*]] -// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP4:.*]] -// CHECK-NEXT: ] -// CHECK: [[SEQCST]]: -// CHECK-NEXT: switch i32 [[TMP2]], label %[[OPENCL_ALLSVMDEVICES8:.*]] [ -// CHECK-NEXT: i32 1, label %[[OPENCL_WORKGROUP6:.*]] -// CHECK-NEXT: i32 2, label %[[OPENCL_DEVICE7:.*]] -// CHECK-NEXT: i32 4, label %[[OPENCL_SUBGROUP9:.*]] -// CHECK-NEXT: ] -// CHECK: [[ATOMIC_CONTINUE:.*]]: -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[TMP3]], ptr [[X_ASCAST]], align 4 -// CHECK-NEXT: ret void -// CHECK: [[OPENCL_WORKGROUP]]: -// CHECK-NEXT: [[TMP4:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK-NEXT: store i32 [[TMP4]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE:.*]] -// CHECK: [[OPENCL_DEVICE]]: -// CHECK-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent-one-as") monotonic, align 4 -// CHECK-NEXT: store i32 [[TMP5]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// CHECK: [[OPENCL_ALLSVMDEVICES]]: -// CHECK-NEXT: [[TMP6:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("one-as") monotonic, align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// CHECK: [[OPENCL_SUBGROUP]]: -// CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]] -// CHECK: [[ATOMIC_SCOPE_CONTINUE]]: -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] -// CHECK: [[OPENCL_WORKGROUP1]]: -// CHECK-NEXT: [[TMP8:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup-one-as") acquire, align 4 -// CHECK-NEXT: store i32 [[TMP8]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5:.*]] -// CHECK: [[OPENCL_DEVICE2]]: -// CHECK-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent-one-as") acquire, align 4 -// CHECK-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]] -// CHECK: [[OPENCL_ALLSVMDEVICES3]]: -// CHECK-NEXT: [[TMP10:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("one-as") acquire, align 4 -// CHECK-NEXT: store i32 [[TMP10]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]] -// CHECK: [[OPENCL_SUBGROUP4]]: -// CHECK-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront-one-as") acquire, align 4 -// CHECK-NEXT: store i32 [[TMP11]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE5]] -// CHECK: [[ATOMIC_SCOPE_CONTINUE5]]: -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] -// CHECK: [[OPENCL_WORKGROUP6]]: -// CHECK-NEXT: [[TMP12:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10:.*]] -// CHECK: [[OPENCL_DEVICE7]]: -// CHECK-NEXT: [[TMP13:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("agent") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP13]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]] -// CHECK: [[OPENCL_ALLSVMDEVICES8]]: -// CHECK-NEXT: [[TMP14:%.*]] = load atomic i32, ptr [[TMP0]] seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP14]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]] -// CHECK: [[OPENCL_SUBGROUP9]]: -// CHECK-NEXT: [[TMP15:%.*]] = load atomic i32, ptr [[TMP0]] syncscope("wavefront") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP15]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE10]] -// CHECK: [[ATOMIC_SCOPE_CONTINUE10]]: -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] -// void fi6(atomic_int *i, int order, int scope) { + // CHECK-LABEL: @fi6 + // CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [ + // CHECK-NEXT: i32 1, label %[[acquire:.*]] + // CHECK-NEXT: i32 2, label %[[acquire:.*]] + // CHECK-NEXT: i32 5, label %[[seqcst:.*]] + // CHECK-NEXT: ] + // CHECK: [[monotonic]]: + // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[MON_WG:.*]] + // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: [[acquire]]: + // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]] + // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: [[seqcst]]: + // CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]] + // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: [[MON_WG]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}} + // CHECK: [[MON_DEV]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent-one-as") monotonic, align 4{{$}} + // CHECK: [[MON_ALL]]: + // CHECK: load atomic i32, ptr %{{.*}} monotonic, align 4{{$}} + // CHECK: [[MON_SUB]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront-one-as") monotonic, align 4{{$}} + // CHECK: [[ACQ_WG]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup-one-as") acquire, align 4{{$}} + // CHECK: [[ACQ_DEV]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent-one-as") acquire, align 4{{$}} + // CHECK: [[ACQ_ALL]]: + // CHECK: load atomic i32, ptr %{{.*}} acquire, align 4{{$}} + // CHECK: [[ACQ_SUB]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront-one-as") acquire, align 4{{$}} + // CHECK: [[SEQ_WG]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("workgroup") seq_cst, align 4{{$}} + // CHECK: [[SEQ_DEV]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("agent") seq_cst, align 4{{$}} + // CHECK: [[SEQ_ALL]]: + // CHECK: load atomic i32, ptr %{{.*}} seq_cst, align 4{{$}} + // CHECK: [[SEQ_SUB]]: + // CHECK: load atomic i32, ptr %{{.*}} syncscope("wavefront") seq_cst, align 4{{$}} int x = __opencl_atomic_load(i, order, scope); } -// CHECK-LABEL: define dso_local float @ff1( -// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr -// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr [[D_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[D_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[TMP0]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: ret float [[TMP2]] -// float ff1(global atomic_float *d) { + // CHECK-LABEL: @ff1 + // CHECK: load atomic i32, ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}} return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group); } -// CHECK-LABEL: define dso_local void @ff2( -// CHECK-SAME: ptr noundef [[D:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr -// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] syncscope("workgroup-one-as") release, align 4 -// CHECK-NEXT: ret void -// void ff2(atomic_float *d) { + // CHECK-LABEL: @ff2 + // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release, align 4 __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group); } -// CHECK-LABEL: define dso_local float @ff3( -// CHECK-SAME: ptr noundef [[D:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr -// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: ret float [[TMP3]] -// float ff3(atomic_float *d) { + // CHECK-LABEL: @ff3 + // CHECK: atomicrmw xchg ptr {{.*}} syncscope("workgroup") seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group); } -// CHECK-LABEL: define dso_local float @ff4( -// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]], float noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr -// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr -// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr [[D_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store float [[A]], ptr [[A_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[D_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store float [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP0]], float [[TMP2]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK-NEXT: store float [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: ret float [[TMP4]] -// float ff4(global atomic_float *d, float a) { + // CHECK-LABEL: @ff4 + // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}} return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); } -// CHECK-LABEL: define dso_local float @ff5( -// CHECK-SAME: ptr addrspace(1) noundef [[D:%.*]], double noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) -// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca double, align 8, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr -// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr -// CHECK-NEXT: store ptr addrspace(1) [[D]], ptr [[D_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store double [[A]], ptr [[A_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[D_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[A_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store double [[TMP1]], ptr [[DOTATOMICTMP_ASCAST]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[DOTATOMICTMP_ASCAST]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP0]], double [[TMP2]] syncscope("workgroup-one-as") monotonic, align 8 -// CHECK-NEXT: store double [[TMP3]], ptr [[ATOMIC_TEMP_ASCAST]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ATOMIC_TEMP_ASCAST]], align 8 -// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP4]] to float -// CHECK-NEXT: ret float [[CONV]] -// float ff5(global atomic_double *d, double a) { + // CHECK-LABEL: @ff5 + // CHECK: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}} return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); } -// CHECK-LABEL: define dso_local void @atomic_init_foo( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: store i32 42, ptr addrspace(1) @j, align 4 -// CHECK-NEXT: ret void -// +float ff4_generic(atomic_float *d, float a) { + // CHECK-LABEL: @ff4_generic + // CHECK: atomicrmw fadd ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); +} + +float ff5_generic(atomic_double *d, double a) { + // CHECK-LABEL: @ff5_generic + // CHECK: atomicrmw fadd ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace [[$NOPRIVATE]]{{$}} + return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); +} + +// CHECK-LABEL: @atomic_init_foo void atomic_init_foo() { + // CHECK-NOT: atomic + // CHECK: store __opencl_atomic_init(&j, 42); + // CHECK-NOT: atomic + // CHECK: } } -// CHECK-LABEL: define dso_local void @failureOrder( -// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[CMPXCHG_BOOL2:%.*]] = alloca i8, align 1, addrspace(5) -// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr -// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr -// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr -// CHECK-NEXT: [[CMPXCHG_BOOL2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL2]] to ptr -// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 43, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] syncscope("workgroup-one-as") acquire monotonic, align 4 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 -// CHECK-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] -// CHECK: [[CMPXCHG_STORE_EXPECTED]]: -// CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]] -// CHECK: [[CMPXCHG_CONTINUE]]: -// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8 -// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1 -// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[PTR2_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 43, ptr [[DOTATOMICTMP1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 -// CHECK-NEXT: [[TMP12:%.*]] = cmpxchg weak ptr [[TMP8]], i32 [[TMP10]], i32 [[TMP11]] syncscope("workgroup") seq_cst acquire, align 4 -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 -// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 -// CHECK-NEXT: br i1 [[TMP14]], label %[[CMPXCHG_CONTINUE4:.*]], label %[[CMPXCHG_STORE_EXPECTED3:.*]] -// CHECK: [[CMPXCHG_STORE_EXPECTED3]]: -// CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP9]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE4]] -// CHECK: [[CMPXCHG_CONTINUE4]]: -// CHECK-NEXT: [[STOREDV5:%.*]] = zext i1 [[TMP14]] to i8 -// CHECK-NEXT: store i8 [[STOREDV5]], ptr [[CMPXCHG_BOOL2_ASCAST]], align 1 -// CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[CMPXCHG_BOOL2_ASCAST]], align 1 -// CHECK-NEXT: [[LOADEDV6:%.*]] = trunc i8 [[TMP15]] to i1 -// CHECK-NEXT: ret void -// +// CHECK-LABEL: @failureOrder void failureOrder(atomic_int *ptr, int *ptr2) { + // CHECK: cmpxchg ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); + // CHECK: cmpxchg weak ptr {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group); } -// CHECK-LABEL: define dso_local void @generalFailureOrder( -// CHECK-SAME: ptr noundef [[PTR:%.*]], ptr noundef [[PTR2:%.*]], i32 noundef [[SUCCESS:%.*]], i32 noundef [[FAIL:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[SUCCESS_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[FAIL_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) -// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr -// CHECK-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr -// CHECK-NEXT: [[SUCCESS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUCCESS_ADDR]] to ptr -// CHECK-NEXT: [[FAIL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FAIL_ADDR]] to ptr -// CHECK-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr -// CHECK-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr -// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[SUCCESS]], ptr [[SUCCESS_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store i32 [[FAIL]], ptr [[FAIL_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SUCCESS_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR2_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 42, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[FAIL_ADDR_ASCAST]], align 4 -// CHECK-NEXT: switch i32 [[TMP1]], label %[[MONOTONIC:.*]] [ -// CHECK-NEXT: i32 1, label %[[ACQUIRE:.*]] -// CHECK-NEXT: i32 2, label %[[ACQUIRE]] -// CHECK-NEXT: i32 3, label %[[RELEASE:.*]] -// CHECK-NEXT: i32 4, label %[[ACQREL:.*]] -// CHECK-NEXT: i32 5, label %[[SEQCST:.*]] -// CHECK-NEXT: ] -// CHECK: [[MONOTONIC]]: -// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL:.*]] [ -// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL:.*]] -// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL]] -// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL:.*]] -// CHECK-NEXT: ] -// CHECK: [[ACQUIRE]]: -// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL8:.*]] [ -// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL9:.*]] -// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL9]] -// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL10:.*]] -// CHECK-NEXT: ] -// CHECK: [[RELEASE]]: -// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL21:.*]] [ -// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL22:.*]] -// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL22]] -// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL23:.*]] -// CHECK-NEXT: ] -// CHECK: [[ACQREL]]: -// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL34:.*]] [ -// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL35:.*]] -// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL35]] -// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL36:.*]] -// CHECK-NEXT: ] -// CHECK: [[SEQCST]]: -// CHECK-NEXT: switch i32 [[TMP3]], label %[[MONOTONIC_FAIL47:.*]] [ -// CHECK-NEXT: i32 1, label %[[ACQUIRE_FAIL48:.*]] -// CHECK-NEXT: i32 2, label %[[ACQUIRE_FAIL48]] -// CHECK-NEXT: i32 5, label %[[SEQCST_FAIL49:.*]] -// CHECK-NEXT: ] -// CHECK: [[ATOMIC_CONTINUE:.*]]: -// CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK-NEXT: ret void -// CHECK: [[MONOTONIC_FAIL]]: -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP6]] syncscope("workgroup-one-as") monotonic monotonic, align 4 -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { i32, i1 } [[TMP7]], 0 -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 -// CHECK-NEXT: br i1 [[TMP9]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] -// CHECK: [[ACQUIRE_FAIL]]: -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP12:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP10]], i32 [[TMP11]] syncscope("workgroup-one-as") monotonic acquire, align 4 -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 -// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 -// CHECK-NEXT: br i1 [[TMP14]], label %[[CMPXCHG_CONTINUE3:.*]], label %[[CMPXCHG_STORE_EXPECTED2:.*]] -// CHECK: [[SEQCST_FAIL]]: -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP15]], i32 [[TMP16]] syncscope("workgroup-one-as") monotonic seq_cst, align 4 -// CHECK-NEXT: [[TMP18:%.*]] = extractvalue { i32, i1 } [[TMP17]], 0 -// CHECK-NEXT: [[TMP19:%.*]] = extractvalue { i32, i1 } [[TMP17]], 1 -// CHECK-NEXT: br i1 [[TMP19]], label %[[CMPXCHG_CONTINUE6:.*]], label %[[CMPXCHG_STORE_EXPECTED5:.*]] -// CHECK: [[ATOMIC_CONTINUE1:.*]]: -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] -// CHECK: [[CMPXCHG_STORE_EXPECTED]]: -// CHECK-NEXT: store i32 [[TMP8]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]] -// CHECK: [[CMPXCHG_CONTINUE]]: -// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP9]] to i8 -// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]] -// CHECK: [[CMPXCHG_STORE_EXPECTED2]]: -// CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE3]] -// CHECK: [[CMPXCHG_CONTINUE3]]: -// CHECK-NEXT: [[STOREDV4:%.*]] = zext i1 [[TMP14]] to i8 -// CHECK-NEXT: store i8 [[STOREDV4]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]] -// CHECK: [[CMPXCHG_STORE_EXPECTED5]]: -// CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE6]] -// CHECK: [[CMPXCHG_CONTINUE6]]: -// CHECK-NEXT: [[STOREDV7:%.*]] = zext i1 [[TMP19]] to i8 -// CHECK-NEXT: store i8 [[STOREDV7]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE1]] -// CHECK: [[MONOTONIC_FAIL8]]: -// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP20]], i32 [[TMP21]] syncscope("workgroup-one-as") acquire monotonic, align 4 -// CHECK-NEXT: [[TMP23:%.*]] = extractvalue { i32, i1 } [[TMP22]], 0 -// CHECK-NEXT: [[TMP24:%.*]] = extractvalue { i32, i1 } [[TMP22]], 1 -// CHECK-NEXT: br i1 [[TMP24]], label %[[CMPXCHG_CONTINUE13:.*]], label %[[CMPXCHG_STORE_EXPECTED12:.*]] -// CHECK: [[ACQUIRE_FAIL9]]: -// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP25]], i32 [[TMP26]] syncscope("workgroup-one-as") acquire acquire, align 4 -// CHECK-NEXT: [[TMP28:%.*]] = extractvalue { i32, i1 } [[TMP27]], 0 -// CHECK-NEXT: [[TMP29:%.*]] = extractvalue { i32, i1 } [[TMP27]], 1 -// CHECK-NEXT: br i1 [[TMP29]], label %[[CMPXCHG_CONTINUE16:.*]], label %[[CMPXCHG_STORE_EXPECTED15:.*]] -// CHECK: [[SEQCST_FAIL10]]: -// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP32:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP30]], i32 [[TMP31]] syncscope("workgroup-one-as") acquire seq_cst, align 4 -// CHECK-NEXT: [[TMP33:%.*]] = extractvalue { i32, i1 } [[TMP32]], 0 -// CHECK-NEXT: [[TMP34:%.*]] = extractvalue { i32, i1 } [[TMP32]], 1 -// CHECK-NEXT: br i1 [[TMP34]], label %[[CMPXCHG_CONTINUE19:.*]], label %[[CMPXCHG_STORE_EXPECTED18:.*]] -// CHECK: [[ATOMIC_CONTINUE11:.*]]: -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] -// CHECK: [[CMPXCHG_STORE_EXPECTED12]]: -// CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE13]] -// CHECK: [[CMPXCHG_CONTINUE13]]: -// CHECK-NEXT: [[STOREDV14:%.*]] = zext i1 [[TMP24]] to i8 -// CHECK-NEXT: store i8 [[STOREDV14]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE11]] -// CHECK: [[CMPXCHG_STORE_EXPECTED15]]: -// CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE16]] -// CHECK: [[CMPXCHG_CONTINUE16]]: -// CHECK-NEXT: [[STOREDV17:%.*]] = zext i1 [[TMP29]] to i8 -// CHECK-NEXT: store i8 [[STOREDV17]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE11]] -// CHECK: [[CMPXCHG_STORE_EXPECTED18]]: -// CHECK-NEXT: store i32 [[TMP33]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE19]] -// CHECK: [[CMPXCHG_CONTINUE19]]: -// CHECK-NEXT: [[STOREDV20:%.*]] = zext i1 [[TMP34]] to i8 -// CHECK-NEXT: store i8 [[STOREDV20]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE11]] -// CHECK: [[MONOTONIC_FAIL21]]: -// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP37:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP35]], i32 [[TMP36]] syncscope("workgroup-one-as") release monotonic, align 4 -// CHECK-NEXT: [[TMP38:%.*]] = extractvalue { i32, i1 } [[TMP37]], 0 -// CHECK-NEXT: [[TMP39:%.*]] = extractvalue { i32, i1 } [[TMP37]], 1 -// CHECK-NEXT: br i1 [[TMP39]], label %[[CMPXCHG_CONTINUE26:.*]], label %[[CMPXCHG_STORE_EXPECTED25:.*]] -// CHECK: [[ACQUIRE_FAIL22]]: -// CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP42:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP40]], i32 [[TMP41]] syncscope("workgroup-one-as") release acquire, align 4 -// CHECK-NEXT: [[TMP43:%.*]] = extractvalue { i32, i1 } [[TMP42]], 0 -// CHECK-NEXT: [[TMP44:%.*]] = extractvalue { i32, i1 } [[TMP42]], 1 -// CHECK-NEXT: br i1 [[TMP44]], label %[[CMPXCHG_CONTINUE29:.*]], label %[[CMPXCHG_STORE_EXPECTED28:.*]] -// CHECK: [[SEQCST_FAIL23]]: -// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP47:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP45]], i32 [[TMP46]] syncscope("workgroup-one-as") release seq_cst, align 4 -// CHECK-NEXT: [[TMP48:%.*]] = extractvalue { i32, i1 } [[TMP47]], 0 -// CHECK-NEXT: [[TMP49:%.*]] = extractvalue { i32, i1 } [[TMP47]], 1 -// CHECK-NEXT: br i1 [[TMP49]], label %[[CMPXCHG_CONTINUE32:.*]], label %[[CMPXCHG_STORE_EXPECTED31:.*]] -// CHECK: [[ATOMIC_CONTINUE24:.*]]: -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] -// CHECK: [[CMPXCHG_STORE_EXPECTED25]]: -// CHECK-NEXT: store i32 [[TMP38]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE26]] -// CHECK: [[CMPXCHG_CONTINUE26]]: -// CHECK-NEXT: [[STOREDV27:%.*]] = zext i1 [[TMP39]] to i8 -// CHECK-NEXT: store i8 [[STOREDV27]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE24]] -// CHECK: [[CMPXCHG_STORE_EXPECTED28]]: -// CHECK-NEXT: store i32 [[TMP43]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE29]] -// CHECK: [[CMPXCHG_CONTINUE29]]: -// CHECK-NEXT: [[STOREDV30:%.*]] = zext i1 [[TMP44]] to i8 -// CHECK-NEXT: store i8 [[STOREDV30]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE24]] -// CHECK: [[CMPXCHG_STORE_EXPECTED31]]: -// CHECK-NEXT: store i32 [[TMP48]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE32]] -// CHECK: [[CMPXCHG_CONTINUE32]]: -// CHECK-NEXT: [[STOREDV33:%.*]] = zext i1 [[TMP49]] to i8 -// CHECK-NEXT: store i8 [[STOREDV33]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE24]] -// CHECK: [[MONOTONIC_FAIL34]]: -// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP52:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP50]], i32 [[TMP51]] syncscope("workgroup-one-as") acq_rel monotonic, align 4 -// CHECK-NEXT: [[TMP53:%.*]] = extractvalue { i32, i1 } [[TMP52]], 0 -// CHECK-NEXT: [[TMP54:%.*]] = extractvalue { i32, i1 } [[TMP52]], 1 -// CHECK-NEXT: br i1 [[TMP54]], label %[[CMPXCHG_CONTINUE39:.*]], label %[[CMPXCHG_STORE_EXPECTED38:.*]] -// CHECK: [[ACQUIRE_FAIL35]]: -// CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP57:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP55]], i32 [[TMP56]] syncscope("workgroup-one-as") acq_rel acquire, align 4 -// CHECK-NEXT: [[TMP58:%.*]] = extractvalue { i32, i1 } [[TMP57]], 0 -// CHECK-NEXT: [[TMP59:%.*]] = extractvalue { i32, i1 } [[TMP57]], 1 -// CHECK-NEXT: br i1 [[TMP59]], label %[[CMPXCHG_CONTINUE42:.*]], label %[[CMPXCHG_STORE_EXPECTED41:.*]] -// CHECK: [[SEQCST_FAIL36]]: -// CHECK-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP62:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP60]], i32 [[TMP61]] syncscope("workgroup-one-as") acq_rel seq_cst, align 4 -// CHECK-NEXT: [[TMP63:%.*]] = extractvalue { i32, i1 } [[TMP62]], 0 -// CHECK-NEXT: [[TMP64:%.*]] = extractvalue { i32, i1 } [[TMP62]], 1 -// CHECK-NEXT: br i1 [[TMP64]], label %[[CMPXCHG_CONTINUE45:.*]], label %[[CMPXCHG_STORE_EXPECTED44:.*]] -// CHECK: [[ATOMIC_CONTINUE37:.*]]: -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] -// CHECK: [[CMPXCHG_STORE_EXPECTED38]]: -// CHECK-NEXT: store i32 [[TMP53]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE39]] -// CHECK: [[CMPXCHG_CONTINUE39]]: -// CHECK-NEXT: [[STOREDV40:%.*]] = zext i1 [[TMP54]] to i8 -// CHECK-NEXT: store i8 [[STOREDV40]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE37]] -// CHECK: [[CMPXCHG_STORE_EXPECTED41]]: -// CHECK-NEXT: store i32 [[TMP58]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE42]] -// CHECK: [[CMPXCHG_CONTINUE42]]: -// CHECK-NEXT: [[STOREDV43:%.*]] = zext i1 [[TMP59]] to i8 -// CHECK-NEXT: store i8 [[STOREDV43]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE37]] -// CHECK: [[CMPXCHG_STORE_EXPECTED44]]: -// CHECK-NEXT: store i32 [[TMP63]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE45]] -// CHECK: [[CMPXCHG_CONTINUE45]]: -// CHECK-NEXT: [[STOREDV46:%.*]] = zext i1 [[TMP64]] to i8 -// CHECK-NEXT: store i8 [[STOREDV46]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE37]] -// CHECK: [[MONOTONIC_FAIL47]]: -// CHECK-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP67:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP65]], i32 [[TMP66]] syncscope("workgroup") seq_cst monotonic, align 4 -// CHECK-NEXT: [[TMP68:%.*]] = extractvalue { i32, i1 } [[TMP67]], 0 -// CHECK-NEXT: [[TMP69:%.*]] = extractvalue { i32, i1 } [[TMP67]], 1 -// CHECK-NEXT: br i1 [[TMP69]], label %[[CMPXCHG_CONTINUE52:.*]], label %[[CMPXCHG_STORE_EXPECTED51:.*]] -// CHECK: [[ACQUIRE_FAIL48]]: -// CHECK-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP72:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP70]], i32 [[TMP71]] syncscope("workgroup") seq_cst acquire, align 4 -// CHECK-NEXT: [[TMP73:%.*]] = extractvalue { i32, i1 } [[TMP72]], 0 -// CHECK-NEXT: [[TMP74:%.*]] = extractvalue { i32, i1 } [[TMP72]], 1 -// CHECK-NEXT: br i1 [[TMP74]], label %[[CMPXCHG_CONTINUE55:.*]], label %[[CMPXCHG_STORE_EXPECTED54:.*]] -// CHECK: [[SEQCST_FAIL49]]: -// CHECK-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: [[TMP76:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP77:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP75]], i32 [[TMP76]] syncscope("workgroup") seq_cst seq_cst, align 4 -// CHECK-NEXT: [[TMP78:%.*]] = extractvalue { i32, i1 } [[TMP77]], 0 -// CHECK-NEXT: [[TMP79:%.*]] = extractvalue { i32, i1 } [[TMP77]], 1 -// CHECK-NEXT: br i1 [[TMP79]], label %[[CMPXCHG_CONTINUE58:.*]], label %[[CMPXCHG_STORE_EXPECTED57:.*]] -// CHECK: [[ATOMIC_CONTINUE50:.*]]: -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]] -// CHECK: [[CMPXCHG_STORE_EXPECTED51]]: -// CHECK-NEXT: store i32 [[TMP68]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE52]] -// CHECK: [[CMPXCHG_CONTINUE52]]: -// CHECK-NEXT: [[STOREDV53:%.*]] = zext i1 [[TMP69]] to i8 -// CHECK-NEXT: store i8 [[STOREDV53]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE50]] -// CHECK: [[CMPXCHG_STORE_EXPECTED54]]: -// CHECK-NEXT: store i32 [[TMP73]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE55]] -// CHECK: [[CMPXCHG_CONTINUE55]]: -// CHECK-NEXT: [[STOREDV56:%.*]] = zext i1 [[TMP74]] to i8 -// CHECK-NEXT: store i8 [[STOREDV56]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE50]] -// CHECK: [[CMPXCHG_STORE_EXPECTED57]]: -// CHECK-NEXT: store i32 [[TMP78]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE58]] -// CHECK: [[CMPXCHG_CONTINUE58]]: -// CHECK-NEXT: [[STOREDV59:%.*]] = zext i1 [[TMP79]] to i8 -// CHECK-NEXT: store i8 [[STOREDV59]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 -// CHECK-NEXT: br label %[[ATOMIC_CONTINUE50]] -// +// CHECK-LABEL: @generalFailureOrder void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) { __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group); +// CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQUIRE]] + // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]] + + // CHECK: [[MONOTONIC]] + // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 5, label %[[MONOTONIC_SEQCST:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[ACQUIRE]] + // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 5, label %[[ACQUIRE_SEQCST:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[RELEASE]] + // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 5, label %[[RELEASE_SEQCST:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[ACQREL]] + // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 5, label %[[ACQREL_SEQCST:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[SEQCST]] + // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE]] + // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[MONOTONIC_MONOTONIC]] + // CHECK: cmpxchg {{.*}} monotonic monotonic, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[MONOTONIC_ACQUIRE]] + // CHECK: cmpxchg {{.*}} monotonic acquire, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[MONOTONIC_SEQCST]] + // CHECK: cmpxchg {{.*}} monotonic seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[ACQUIRE_MONOTONIC]] + // CHECK: cmpxchg {{.*}} acquire monotonic, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[ACQUIRE_ACQUIRE]] + // CHECK: cmpxchg {{.*}} acquire acquire, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[ACQUIRE_SEQCST]] + // CHECK: cmpxchg {{.*}} acquire seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[RELEASE_MONOTONIC]] + // CHECK: cmpxchg {{.*}} release monotonic, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[RELEASE_ACQUIRE]] + // CHECK: cmpxchg {{.*}} release acquire, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[RELEASE_SEQCST]] + // CHECK: cmpxchg {{.*}} release seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[ACQREL_MONOTONIC]] + // CHECK: cmpxchg {{.*}} acq_rel monotonic, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[ACQREL_ACQUIRE]] + // CHECK: cmpxchg {{.*}} acq_rel acquire, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[ACQREL_SEQCST]] + // CHECK: cmpxchg {{.*}} acq_rel seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[SEQCST_MONOTONIC]] + // CHECK: cmpxchg {{.*}} seq_cst monotonic, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[SEQCST_ACQUIRE]] + // CHECK: cmpxchg {{.*}} seq_cst acquire, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br + + // CHECK: [[SEQCST_SEQCST]] + // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align 4, !noalias.addrspace [[$NOPRIVATE]]{{$}} + // CHECK: br } -// CHECK-LABEL: define dso_local i32 @test_volatile( -// CHECK-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// CHECK-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// CHECK-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr -// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load atomic volatile i32, ptr [[TMP0]] syncscope("workgroup") seq_cst, align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 -// CHECK-NEXT: ret i32 [[TMP2]] -// int test_volatile(volatile atomic_int *i) { + // CHECK-LABEL: @test_volatile + // CHECK: %[[i_addr:.*]] = alloca ptr + // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32 + // CHECK-NEXT: %[[retval_ascast:.*]] = addrspacecast ptr addrspace(5) {{.*}} to ptr + // CHECK-NEXT: %[[i_addr_ascast:.*]] = addrspacecast ptr addrspace(5) %[[i_addr]] to ptr + // CHECK-NEXT: %[[atomicdst_ascast:.*]] = addrspacecast ptr addrspace(5) %[[atomicdst]] to ptr + // CHECK-NEXT: store ptr %i, ptr %[[i_addr_ascast]] + // CHECK-NEXT: %[[addr:.*]] = load ptr, ptr %[[i_addr_ascast]] + // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, ptr %[[addr]] syncscope("workgroup") seq_cst, align 4{{$}} + // CHECK-NEXT: store i32 %[[res]], ptr %[[atomicdst_ascast]] + // CHECK-NEXT: %[[retval:.*]] = load i32, ptr %[[atomicdst_ascast]] + // CHECK-NEXT: ret i32 %[[retval]] return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); } From 85bba76a1af0136132fce3a06796f0a287476a82 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 22 Oct 2024 00:19:13 +0100 Subject: [PATCH 7/7] Re-add missing `CHECK`. --- clang/test/CodeGenOpenCL/atomic-ops.cl | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/CodeGenOpenCL/atomic-ops.cl b/clang/test/CodeGenOpenCL/atomic-ops.cl index 6c0cb3487fb18..1d850261e5e81 100644 --- a/clang/test/CodeGenOpenCL/atomic-ops.cl +++ b/clang/test/CodeGenOpenCL/atomic-ops.cl @@ -43,6 +43,7 @@ void fi1(atomic_int *i) { // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst, align 4{{$}} x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); + // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} seq_cst, align 4{{$}} x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); // CHECK: load atomic i32, ptr %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst, align 4{{$}}