diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index c01de4a289a69..97e646cdb8f11 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -326,7 +326,8 @@ namespace llvm { bool parseOptionalStackAlignment(unsigned &Alignment); bool parseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma); bool parseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc, - bool &AteExtraComma); + bool &AteExtraComma, + unsigned DefaultAS = 0); bool parseAllocSizeArguments(unsigned &BaseSizeArg, std::optional &HowManyArg); bool parseVScaleRangeArguments(unsigned &MinValue, unsigned &MaxValue); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index af0422f09bc4f..3365ea57b760d 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -2711,7 +2711,8 @@ bool LLParser::parseOptionalCommaAlign(MaybeAlign &Alignment, /// This returns with AteExtraComma set to true if it ate an excess comma at the /// end. bool LLParser::parseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc, - bool &AteExtraComma) { + bool &AteExtraComma, + unsigned DefaultAS) { AteExtraComma = false; while (EatIfPresent(lltok::comma)) { // Metadata at the end is an early exit. @@ -2724,7 +2725,7 @@ bool LLParser::parseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc, if (Lex.getKind() != lltok::kw_addrspace) return error(Lex.getLoc(), "expected metadata or 'addrspace'"); - if (parseOptionalAddrSpace(AddrSpace)) + if (parseOptionalAddrSpace(AddrSpace, DefaultAS)) return true; } @@ -8354,7 +8355,8 @@ int LLParser::parseAlloc(Instruction *&Inst, PerFunctionState &PFS) { Value *Size = nullptr; LocTy SizeLoc, TyLoc, ASLoc; MaybeAlign Alignment; - unsigned AddrSpace = 0; + unsigned DefaultAS = M->getDataLayout().getAllocaAddrSpace(); + unsigned AddrSpace = DefaultAS; Type *Ty = nullptr; bool IsInAlloca = EatIfPresent(lltok::kw_inalloca); @@ -8371,11 +8373,12 @@ int LLParser::parseAlloc(Instruction *&Inst, PerFunctionState &PFS) { if (Lex.getKind() == lltok::kw_align) { if (parseOptionalAlignment(Alignment)) return true; - if (parseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma)) + if (parseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma, + DefaultAS)) return true; } else if (Lex.getKind() == lltok::kw_addrspace) { ASLoc = Lex.getLoc(); - if (parseOptionalAddrSpace(AddrSpace)) + if (parseOptionalAddrSpace(AddrSpace, DefaultAS)) return true; } else if (Lex.getKind() == lltok::MetadataVar) { AteExtraComma = true; @@ -8386,11 +8389,12 @@ int LLParser::parseAlloc(Instruction *&Inst, PerFunctionState &PFS) { if (Lex.getKind() == lltok::kw_align) { if (parseOptionalAlignment(Alignment)) return true; - if (parseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma)) + if (parseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma, + DefaultAS)) return true; } else if (Lex.getKind() == lltok::kw_addrspace) { ASLoc = Lex.getLoc(); - if (parseOptionalAddrSpace(AddrSpace)) + if (parseOptionalAddrSpace(AddrSpace, DefaultAS)) return true; } else if (Lex.getKind() == lltok::MetadataVar) { AteExtraComma = true; diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index ac8aa0d35ea30..9653cf50e83ea 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -4724,7 +4724,11 @@ void AssemblyWriter::printInstruction(const Instruction &I) { } unsigned AddrSpace = AI->getAddressSpace(); - if (AddrSpace != 0) { + unsigned DefaultAllocaAS = + I.getModule()->getDataLayout().getAllocaAddrSpace(); + // Avoid confusion by omitting the addrspace only if it is 0 and that is the + // default. Dropping the "AddrSpace != 0" condition would also be correct. + if (AddrSpace != 0 || AddrSpace != DefaultAllocaAS) { Out << ", addrspace(" << AddrSpace << ')'; } } else if (isa(I)) { diff --git a/llvm/test/Assembler/alloca-addrspace-default.ll b/llvm/test/Assembler/alloca-addrspace-default.ll new file mode 100644 index 0000000000000..0572ed7f5dcd0 --- /dev/null +++ b/llvm/test/Assembler/alloca-addrspace-default.ll @@ -0,0 +1,41 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +target datalayout = "A9" +; CHECK: target datalayout = "A9" + + +; CHECK: %alloca_scalar_no_align = alloca i32, align 4, addrspace(9) +; CHECK-NEXT: %alloca_scalar_align4 = alloca i32, align 4, addrspace(9) +; CHECK-NEXT: %alloca_scalar_no_align_metadata = alloca i32, align 4, addrspace(9), !foo !0 +; CHECK-NEXT: %alloca_scalar_align4_metadata = alloca i32, align 4, addrspace(9), !foo !0 +; CHECK-NEXT: %alloca_inalloca_scalar_no_align = alloca inalloca i32, align 4, addrspace(9) +; CHECK-NEXT: %alloca_inalloca_scalar_align4_metadata = alloca inalloca i32, align 4, addrspace(9), !foo !0 +define void @use_alloca_default() { + %alloca_scalar_no_align = alloca i32 + %alloca_scalar_align4 = alloca i32, align 4 + %alloca_scalar_no_align_metadata = alloca i32, !foo !0 + %alloca_scalar_align4_metadata = alloca i32, align 4, !foo !0 + %alloca_inalloca_scalar_no_align = alloca inalloca i32 + %alloca_inalloca_scalar_align4_metadata = alloca inalloca i32, align 4, !foo !0 + + ret void +} + +; CHECK: %alloca_scalar_no_align = alloca i32, align 4, addrspace(0) +; CHECK-NEXT: %alloca_scalar_align4 = alloca i32, align 4, addrspace(0) +; CHECK-NEXT: %alloca_scalar_no_align_metadata = alloca i32, align 4, addrspace(0), !foo !0 +; CHECK-NEXT: %alloca_scalar_align4_metadata = alloca i32, align 4, addrspace(0), !foo !0 +; CHECK-NEXT: %alloca_inalloca_scalar_no_align = alloca inalloca i32, align 4, addrspace(0) +; CHECK-NEXT: %alloca_inalloca_scalar_align4_metadata = alloca inalloca i32, align 4, addrspace(0), !foo !0 +define void @use_alloca_nondefault0() { + %alloca_scalar_no_align = alloca i32, addrspace(0) + %alloca_scalar_align4 = alloca i32, align 4, addrspace(0) + %alloca_scalar_no_align_metadata = alloca i32, addrspace(0), !foo !0 + %alloca_scalar_align4_metadata = alloca i32, align 4, addrspace(0), !foo !0 + %alloca_inalloca_scalar_no_align = alloca inalloca i32, addrspace(0) + %alloca_inalloca_scalar_align4_metadata = alloca inalloca i32, align 4, addrspace(0), !foo !0 + + ret void +} + +!0 = !{} diff --git a/llvm/test/Assembler/symbolic-addrspace.ll b/llvm/test/Assembler/symbolic-addrspace.ll index 7cdfb7cce1e93..6d00c58523b9a 100644 --- a/llvm/test/Assembler/symbolic-addrspace.ll +++ b/llvm/test/Assembler/symbolic-addrspace.ll @@ -38,7 +38,7 @@ target datalayout = "A1-G2-P3" define void @foo() { ; ALLOCA-IN-GLOBALS: %alloca = alloca i32, align 4, addrspace(2){{$}} ; ALLOCA-IN-GLOBALS: %alloca2 = alloca i32, align 4, addrspace(1){{$}} - ; ALLOCA-IN-GLOBALS: %alloca3 = alloca i32, align 4{{$}} + ; ALLOCA-IN-GLOBALS: %alloca3 = alloca i32, align 4, addrspace(1){{$}} ; ALLOCA-IN-GLOBALS: %alloca4 = alloca i32, align 4, addrspace(3){{$}} %alloca = alloca i32, addrspace("G") %alloca2 = alloca i32, addrspace("A") diff --git a/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll b/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll index 1e72e679e83c0..0b0e4a63d7ade 100644 --- a/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll +++ b/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll @@ -10,7 +10,7 @@ declare void @func(ptr) define void @main() { bb: - %alloca = alloca i32, align 4 + %alloca = alloca i32, align 4, addrspace(0) call void @func(ptr %alloca) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/lower-indirect-lds-references.ll b/llvm/test/CodeGen/AMDGPU/lower-indirect-lds-references.ll index 1b0c8d66d3ebc..2e73bcc91821d 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-indirect-lds-references.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-indirect-lds-references.ll @@ -16,7 +16,7 @@ define amdgpu_kernel void @offloading_kernel() { } define void @call_unknown() { - %1 = alloca ptr, align 8 + %1 = alloca ptr, align 8, addrspace(0) %2 = call i32 %1() ret void } diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alloca-as.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alloca-as.ll index 9e2cd06d26ea9..e14c6f8e3713a 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alloca-as.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alloca-as.ll @@ -10,7 +10,7 @@ define i32 @bar(i32 %arg) { ; TUNIT-LABEL: define {{[^@]+}}@bar ; TUNIT-SAME: (i32 [[ARG:%.*]]) { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[STACK:%.*]] = alloca i32, align 4 +; TUNIT-NEXT: [[STACK:%.*]] = alloca i32, align 4, addrspace(0) ; TUNIT-NEXT: store i32 [[ARG]], ptr [[STACK]], align 4 ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr [[STACK]], align 4 ; TUNIT-NEXT: [[CALL:%.*]] = call i32 @foo(i32 [[TMP0]]) @@ -19,13 +19,13 @@ define i32 @bar(i32 %arg) { ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: (i32 [[ARG:%.*]]) { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[STACK:%.*]] = alloca i32, align 4 +; CGSCC-NEXT: [[STACK:%.*]] = alloca i32, align 4, addrspace(0) ; CGSCC-NEXT: store i32 [[ARG]], ptr [[STACK]], align 4 ; CGSCC-NEXT: [[CALL:%.*]] = call i32 @foo(i32 [[ARG]]) ; CGSCC-NEXT: ret i32 [[CALL]] ; entry: - %stack = alloca i32 + %stack = alloca i32, addrspace(0) store i32 %arg, ptr %stack %call = call i32 @foo(ptr %stack) ret i32 %call diff --git a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll index 9a2bfac0feb02..33433f2c28964 100644 --- a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll +++ b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll @@ -12,12 +12,12 @@ declare void @use2(ptr, ptr) define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(ptr %a) { ; CHECK-LABEL: @__omp_offloading_802_ea0109_main_l8( ; CHECK-NEXT: .master: -; CHECK-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 1, addrspace(0) ; CHECK-NEXT: call void @use2(ptr nonnull [[TMP0]], ptr nonnull [[TMP0]]) ; CHECK-NEXT: ret void ; .master: - %0 = alloca i8, i64 8, align 1 + %0 = alloca i8, i64 8, align 1, addrspace(0) store ptr undef, ptr %0, align 8 call void @use2(ptr %0, ptr %0) ret void @@ -28,23 +28,23 @@ define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(ptr %a) { define void @spam(ptr %arg1) { ; CHECK-LABEL: @spam( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca [0 x [30 x %struct.widget]], align 16 +; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca [0 x [30 x %struct.widget]], align 16, addrspace(0) ; CHECK-NEXT: call void @zot(ptr nonnull [[ALLOCA1]]) ; CHECK-NEXT: ret void ; bb: - %alloca = alloca [30 x %struct.widget], i32 0, align 16 + %alloca = alloca [30 x %struct.widget], i32 0, align 16, addrspace(0) call void @zot(ptr %alloca) ret void } define i1 @alloca_addrspace_0_nonnull() { ; CHECK-LABEL: @alloca_addrspace_0_nonnull( -; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i8, align 1, addrspace(0) ; CHECK-NEXT: call void @use(ptr nonnull [[ALLOCA]]) ; CHECK-NEXT: ret i1 true ; - %alloca = alloca i8 + %alloca = alloca i8, addrspace(0) call void @use(ptr %alloca) %cmp = icmp ne ptr %alloca, null ret i1 %cmp diff --git a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll index f084fe38bb226..03f939de99e1a 100644 --- a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll +++ b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll @@ -153,7 +153,7 @@ define i32 @remove_alloca_ptr_arg(i1 %c, ptr %ptr) { ; CHECK-NEXT: ret i32 [[V]] ; entry: - %alloca = alloca [32 x i8] + %alloca = alloca [32 x i8], addrspace(0) call void @llvm.memcpy.p0.p0.i64(ptr %alloca, ptr @g1, i64 32, i1 false) br i1 %c, label %if, label %join @@ -212,7 +212,7 @@ define i32 @test_memcpy_after_phi(i1 %cond, ptr %ptr) { ; CHECK-NEXT: ret i32 [[V]] ; entry: - %a = alloca [32 x i8] + %a = alloca [32 x i8], addrspace(0) br i1 %cond, label %if, label %join if: @@ -228,7 +228,7 @@ join: define i32 @addrspace_diff_keep_alloca(i1 %cond, ptr %x) { ; CHECK-LABEL: @addrspace_diff_keep_alloca( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca [32 x i8], align 1 +; CHECK-NEXT: [[A:%.*]] = alloca [32 x i8], align 1, addrspace(0) ; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i64(ptr noundef nonnull align 1 dereferenceable(32) [[A]], ptr addrspace(1) noundef align 16 dereferenceable(32) @g2, i64 32, i1 false) ; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] ; CHECK: if: @@ -239,7 +239,7 @@ define i32 @addrspace_diff_keep_alloca(i1 %cond, ptr %x) { ; CHECK-NEXT: ret i32 [[V]] ; entry: - %a = alloca [32 x i8] + %a = alloca [32 x i8], addrspace(0) call void @llvm.memcpy.p0.p1.i64(ptr %a, ptr addrspace(1) @g2, i64 32, i1 false) br i1 %cond, label %if, label %join @@ -255,7 +255,7 @@ join: define i32 @addrspace_diff_keep_alloca_extra_gep(i1 %cond, ptr %x) { ; CHECK-LABEL: @addrspace_diff_keep_alloca_extra_gep( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca [32 x i8], align 1 +; CHECK-NEXT: [[A:%.*]] = alloca [32 x i8], align 1, addrspace(0) ; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i64(ptr noundef nonnull align 1 dereferenceable(32) [[A]], ptr addrspace(1) noundef align 16 dereferenceable(32) @g2, i64 32, i1 false) ; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] ; CHECK: if: @@ -267,7 +267,7 @@ define i32 @addrspace_diff_keep_alloca_extra_gep(i1 %cond, ptr %x) { ; CHECK-NEXT: ret i32 [[V]] ; entry: - %a = alloca [32 x i8] + %a = alloca [32 x i8], addrspace(0) call void @llvm.memcpy.p0.p1.i64(ptr %a, ptr addrspace(1) @g2, i64 32, i1 false) %gep = getelementptr i8, ptr %a, i64 4 br i1 %cond, label %if, label %join @@ -293,7 +293,7 @@ define i32 @addrspace_diff_remove_alloca(i1 %cond) { ; CHECK-NEXT: ret i32 [[V]] ; entry: - %a = alloca [32 x i8] + %a = alloca [32 x i8], addrspace(0) call void @llvm.memcpy.p0.p1.i64(ptr %a, ptr addrspace(1) @g2, i64 32, i1 false) %gep = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 2 br i1 %cond, label %if, label %join @@ -320,7 +320,7 @@ define i32 @phi_loop(i1 %c) { ; CHECK-NEXT: ret i32 [[V]] ; entry: - %alloca = alloca [32 x i8] + %alloca = alloca [32 x i8], addrspace(0) call void @llvm.memcpy.p0.p0.i64(ptr %alloca, ptr @g1, i64 32, i1 false) br label %loop @@ -337,7 +337,7 @@ exit: define i32 @phi_loop_different_addrspace(i1 %c) { ; CHECK-LABEL: @phi_loop_different_addrspace( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 1 +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 1, addrspace(0) ; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i64(ptr noundef nonnull align 1 dereferenceable(32) [[ALLOCA]], ptr addrspace(1) noundef align 16 dereferenceable(32) @g2, i64 32, i1 false) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -349,7 +349,7 @@ define i32 @phi_loop_different_addrspace(i1 %c) { ; CHECK-NEXT: ret i32 [[V]] ; entry: - %alloca = alloca [32 x i8] + %alloca = alloca [32 x i8], addrspace(0) call void @llvm.memcpy.p0.p1.i64(ptr %alloca, ptr addrspace(1) @g2, i64 32, i1 false) br label %loop @@ -371,7 +371,7 @@ define i8 @select_same_addrspace_remove_alloca(i1 %cond, ptr %p) { ; CHECK-NEXT: ret i8 [[LOAD]] ; entry: - %alloca = alloca [32 x i8] + %alloca = alloca [32 x i8], addrspace(0) call void @llvm.memcpy.p0.p0.i64(ptr %alloca, ptr @g1, i64 32, i1 false) %ptr = select i1 %cond, ptr %alloca, ptr %p %load = load i8, ptr %ptr @@ -381,14 +381,14 @@ entry: define i8 @select_after_memcpy_keep_alloca(i1 %cond, ptr %p) { ; CHECK-LABEL: @select_after_memcpy_keep_alloca( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 1 +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 1, addrspace(0) ; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND:%.*]], ptr [[ALLOCA]], ptr [[P:%.*]] ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(32) [[PTR]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false) ; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1 ; CHECK-NEXT: ret i8 [[LOAD]] ; entry: - %alloca = alloca [32 x i8] + %alloca = alloca [32 x i8], addrspace(0) %ptr = select i1 %cond, ptr %alloca, ptr %p call void @llvm.memcpy.p0.p0.i64(ptr %ptr, ptr @g1, i64 32, i1 false) %load = load i8, ptr %ptr @@ -418,7 +418,7 @@ define i8 @select_diff_addrspace_remove_alloca(i1 %cond, ptr %p) { ; CHECK-NEXT: ret i8 0 ; entry: - %alloca = alloca [32 x i8] + %alloca = alloca [32 x i8], addrspace(0) call void @llvm.memcpy.p0.p1.i64(ptr %alloca, ptr addrspace(1) @g2, i64 32, i1 false) %gep = getelementptr inbounds [32 x i8], ptr %alloca, i32 0, i32 2 %sel = select i1 %cond, ptr %alloca, ptr %gep @@ -435,7 +435,7 @@ define i8 @select_diff_addrspace_remove_alloca_asan(i1 %cond, ptr %p) sanitize_a ; CHECK-NEXT: ret i8 [[LOAD]] ; entry: - %alloca = alloca [32 x i8] + %alloca = alloca [32 x i8], addrspace(0) call void @llvm.memcpy.p0.p1.i64(ptr %alloca, ptr addrspace(1) @g2, i64 32, i1 false) %gep = getelementptr inbounds [32 x i8], ptr %alloca, i32 0, i32 2 %sel = select i1 %cond, ptr %alloca, ptr %gep diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll index 10e521bbfcc10..140de716cb41f 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -140,8 +140,8 @@ define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -165,8 +165,8 @@ define weak i32 @__kmpc_target_init(ptr, ptr) { define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @no_parallel_region_in_here() #7 @@ -199,8 +199,8 @@ declare void @__kmpc_target_deinit() define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -219,10 +219,10 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 - %captured_vars_addrs = alloca [0 x ptr], align 8 - %captured_vars_addrs1 = alloca [0 x ptr], align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) + %captured_vars_addrs1 = alloca [0 x ptr], align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 @@ -236,8 +236,8 @@ entry: define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -248,10 +248,10 @@ declare void @p0() #4 define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -266,8 +266,8 @@ declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 @@ -278,10 +278,10 @@ declare void @p1() #4 define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -292,8 +292,8 @@ entry: define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -312,9 +312,9 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 - %captured_vars_addrs = alloca [0 x ptr], align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 @@ -329,7 +329,7 @@ entry: define hidden void @simple_state_machine_interprocedural_before() #1 { entry: - %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) %0 = call i32 @__kmpc_global_thread_num(ptr @2) call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs, i64 0) ret void @@ -337,8 +337,8 @@ entry: define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 @@ -347,10 +347,10 @@ entry: define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -361,7 +361,7 @@ entry: define hidden void @simple_state_machine_interprocedural_after() #1 { entry: - %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) %0 = call i32 @__kmpc_global_thread_num(ptr @2) call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs, i64 0) ret void @@ -369,8 +369,8 @@ entry: define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -389,10 +389,10 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 - %captured_vars_addrs = alloca [0 x ptr], align 8 - %captured_vars_addrs1 = alloca [0 x ptr], align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) + %captured_vars_addrs1 = alloca [0 x ptr], align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 %0 = load ptr, ptr %.global_tid..addr, align 8 @@ -405,8 +405,8 @@ entry: define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -415,10 +415,10 @@ entry: define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -431,8 +431,8 @@ declare i32 @unknown() #4 define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 @@ -441,10 +441,10 @@ entry: define internal void @__omp_outlined__8_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -455,8 +455,8 @@ entry: define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -475,10 +475,10 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 - %captured_vars_addrs = alloca [0 x ptr], align 8 - %captured_vars_addrs1 = alloca [0 x ptr], align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) + %captured_vars_addrs1 = alloca [0 x ptr], align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 %0 = load ptr, ptr %.global_tid..addr, align 8 @@ -491,8 +491,8 @@ entry: define internal void @__omp_outlined__10(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -501,10 +501,10 @@ entry: define internal void @__omp_outlined__10_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -515,8 +515,8 @@ entry: define internal void @__omp_outlined__11(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 @@ -525,10 +525,10 @@ entry: define internal void @__omp_outlined__11_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -539,8 +539,8 @@ entry: define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -559,10 +559,10 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__12(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 - %captured_vars_addrs = alloca [0 x ptr], align 8 - %captured_vars_addrs1 = alloca [0 x ptr], align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) + %captured_vars_addrs1 = alloca [0 x ptr], align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 @@ -576,8 +576,8 @@ entry: define internal void @__omp_outlined__13(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -586,10 +586,10 @@ entry: define internal void @__omp_outlined__13_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -602,8 +602,8 @@ declare void @unknown_pure() #5 define internal void @__omp_outlined__14(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 @@ -612,10 +612,10 @@ entry: define internal void @__omp_outlined__14_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -626,8 +626,8 @@ entry: define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -646,8 +646,8 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__15(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 %call = call i32 @omp_get_thread_num() #7 @@ -657,7 +657,7 @@ entry: define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 { entry: - %a.addr = alloca i32, align 4 + %a.addr = alloca i32, align 4, addrspace(0) store i32 %a, ptr %a.addr, align 4 %0 = load i32, ptr %a.addr, align 4 %cmp = icmp eq i32 %0, 0 @@ -681,8 +681,8 @@ declare i32 @omp_get_thread_num(...) #4 define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -701,8 +701,8 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__16(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @weak_callee_empty() #7 @@ -722,8 +722,8 @@ declare void @__kmpc_barrier(ptr, i32) #6 define internal void @__omp_outlined__17(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -732,10 +732,10 @@ entry: define internal void @__omp_outlined__17_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -746,8 +746,8 @@ entry: define internal void @__omp_outlined__18(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -756,10 +756,10 @@ entry: define internal void @__omp_outlined__18_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -770,7 +770,7 @@ entry: define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 { entry: - %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) %0 = call i32 @__kmpc_global_thread_num(ptr @2) call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs, i64 0) ret void @@ -778,8 +778,8 @@ entry: define internal void @__omp_outlined__19(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -788,10 +788,10 @@ entry: define internal void @__omp_outlined__19_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -904,8 +904,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 ; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]]) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -927,8 +927,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] ; AMDGPU-NEXT: ret void @@ -973,8 +973,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]]) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -1030,9 +1030,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] @@ -1044,8 +1044,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p0() #[[ATTR11:[0-9]+]] ; AMDGPU-NEXT: ret void ; @@ -1054,10 +1054,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1067,8 +1067,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU-NEXT: ret void ; @@ -1077,10 +1077,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1091,8 +1091,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]]) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -1154,8 +1154,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] @@ -1168,7 +1168,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; AMDGPU-SAME: () #[[ATTR6:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void @@ -1178,7 +1178,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void @@ -1188,8 +1188,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU-NEXT: ret void ; @@ -1198,10 +1198,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1211,7 +1211,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; AMDGPU-SAME: () #[[ATTR6]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void @@ -1221,7 +1221,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void @@ -1232,8 +1232,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]]) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -1291,9 +1291,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -1304,8 +1304,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-NEXT: ret void ; @@ -1314,10 +1314,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1327,8 +1327,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU-NEXT: ret void ; @@ -1337,10 +1337,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1351,8 +1351,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]]) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -1408,9 +1408,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -1421,8 +1421,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-NEXT: ret void ; @@ -1431,10 +1431,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1444,8 +1444,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU-NEXT: ret void ; @@ -1454,10 +1454,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1468,8 +1468,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]]) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -1525,9 +1525,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -1538,8 +1538,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-NEXT: ret void ; @@ -1548,10 +1548,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1561,8 +1561,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU-NEXT: ret void ; @@ -1571,10 +1571,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1584,8 +1584,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 ; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]]) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -1602,8 +1602,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] ; AMDGPU-NEXT: ret void @@ -1613,7 +1613,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized ; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -1634,7 +1634,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after ; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -1656,8 +1656,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]]) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -1703,8 +1703,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR9]] ; AMDGPU-NEXT: ret void ; @@ -1720,8 +1720,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-NEXT: ret void ; @@ -1730,10 +1730,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1743,8 +1743,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-NEXT: ret void ; @@ -1753,10 +1753,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -1766,7 +1766,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; AMDGPU-SAME: () #[[ATTR6]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void @@ -1776,7 +1776,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void @@ -1786,8 +1786,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-NEXT: ret void ; @@ -1796,10 +1796,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void @@ -2708,8 +2708,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 ; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]]) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2731,8 +2731,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void @@ -2776,8 +2776,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 ; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]]) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2794,9 +2794,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] @@ -2808,8 +2808,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -2818,10 +2818,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -2831,8 +2831,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -2841,10 +2841,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -2854,8 +2854,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 ; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]]) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2872,8 +2872,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] @@ -2886,7 +2886,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; AMDGPU-DISABLED-SAME: () #[[ATTR6:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void @@ -2896,7 +2896,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void @@ -2906,8 +2906,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -2916,10 +2916,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -2929,7 +2929,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; AMDGPU-DISABLED-SAME: () #[[ATTR6]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void @@ -2939,7 +2939,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void @@ -2949,8 +2949,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 ; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]]) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2967,9 +2967,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -2980,8 +2980,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -2990,10 +2990,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3003,8 +3003,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3013,10 +3013,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3026,8 +3026,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 ; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]]) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -3044,9 +3044,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -3057,8 +3057,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3067,10 +3067,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3080,8 +3080,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3090,10 +3090,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3103,8 +3103,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 ; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]]) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -3121,9 +3121,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -3134,8 +3134,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3144,10 +3144,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3157,8 +3157,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3167,10 +3167,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3180,8 +3180,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 ; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]]) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -3198,8 +3198,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3209,7 +3209,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized ; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -3230,7 +3230,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after ; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -3251,8 +3251,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 ; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]]) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -3269,8 +3269,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3286,8 +3286,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3296,10 +3296,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3309,8 +3309,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3319,10 +3319,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void @@ -3332,7 +3332,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; AMDGPU-DISABLED-SAME: () #[[ATTR6]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void @@ -3342,7 +3342,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void @@ -3352,8 +3352,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3362,10 +3362,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper ; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll index 9576ff6ca6aee..f5322dc779150 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll @@ -141,8 +141,8 @@ define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -164,8 +164,8 @@ declare i32 @__kmpc_target_init(ptr); define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @no_parallel_region_in_here() #7 @@ -198,8 +198,8 @@ declare void @__kmpc_target_deinit() define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -218,10 +218,10 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 - %captured_vars_addrs = alloca [0 x ptr], align 8 - %captured_vars_addrs1 = alloca [0 x ptr], align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) + %captured_vars_addrs1 = alloca [0 x ptr], align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 @@ -235,8 +235,8 @@ entry: define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -247,10 +247,10 @@ declare void @p0() #4 define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -265,8 +265,8 @@ declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 @@ -277,10 +277,10 @@ declare void @p1() #4 define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -291,8 +291,8 @@ entry: define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -311,9 +311,9 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 - %captured_vars_addrs = alloca [0 x ptr], align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 @@ -328,7 +328,7 @@ entry: define hidden void @simple_state_machine_interprocedural_before() #1 { entry: - %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) %0 = call i32 @__kmpc_global_thread_num(ptr @2) call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs, i64 0) ret void @@ -336,8 +336,8 @@ entry: define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 @@ -346,10 +346,10 @@ entry: define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -360,7 +360,7 @@ entry: define hidden void @simple_state_machine_interprocedural_after() #1 { entry: - %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) %0 = call i32 @__kmpc_global_thread_num(ptr @2) call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs, i64 0) ret void @@ -368,8 +368,8 @@ entry: define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -388,10 +388,10 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 - %captured_vars_addrs = alloca [0 x ptr], align 8 - %captured_vars_addrs1 = alloca [0 x ptr], align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) + %captured_vars_addrs1 = alloca [0 x ptr], align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 %0 = load ptr, ptr %.global_tid..addr, align 8 @@ -404,8 +404,8 @@ entry: define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -414,10 +414,10 @@ entry: define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -430,8 +430,8 @@ declare i32 @unknown() #4 define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 @@ -440,10 +440,10 @@ entry: define internal void @__omp_outlined__8_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -454,8 +454,8 @@ entry: define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -474,10 +474,10 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 - %captured_vars_addrs = alloca [0 x ptr], align 8 - %captured_vars_addrs1 = alloca [0 x ptr], align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) + %captured_vars_addrs1 = alloca [0 x ptr], align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 %0 = load ptr, ptr %.global_tid..addr, align 8 @@ -490,8 +490,8 @@ entry: define internal void @__omp_outlined__10(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -500,10 +500,10 @@ entry: define internal void @__omp_outlined__10_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -514,8 +514,8 @@ entry: define internal void @__omp_outlined__11(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 @@ -524,10 +524,10 @@ entry: define internal void @__omp_outlined__11_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -538,8 +538,8 @@ entry: define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -558,10 +558,10 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__12(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 - %captured_vars_addrs = alloca [0 x ptr], align 8 - %captured_vars_addrs1 = alloca [0 x ptr], align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) + %captured_vars_addrs1 = alloca [0 x ptr], align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 @@ -575,8 +575,8 @@ entry: define internal void @__omp_outlined__13(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -585,10 +585,10 @@ entry: define internal void @__omp_outlined__13_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -601,8 +601,8 @@ declare void @unknown_pure() #5 define internal void @__omp_outlined__14(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 @@ -611,10 +611,10 @@ entry: define internal void @__omp_outlined__14_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -625,8 +625,8 @@ entry: define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -645,8 +645,8 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__15(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 %call = call i32 @omp_get_thread_num() #7 @@ -656,7 +656,7 @@ entry: define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 { entry: - %a.addr = alloca i32, align 4 + %a.addr = alloca i32, align 4, addrspace(0) store i32 %a, ptr %a.addr, align 4 %0 = load i32, ptr %a.addr, align 4 %cmp = icmp eq i32 %0, 0 @@ -680,8 +680,8 @@ declare i32 @omp_get_thread_num(...) #4 define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 { entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) store i32 0, ptr %.zero.addr, align 4 %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 @@ -700,8 +700,8 @@ worker.exit: ; preds = %entry define internal void @__omp_outlined__16(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @weak_callee_empty() #7 @@ -721,8 +721,8 @@ declare void @__kmpc_barrier(ptr, i32) #6 define internal void @__omp_outlined__17(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -731,10 +731,10 @@ entry: define internal void @__omp_outlined__17_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -745,8 +745,8 @@ entry: define internal void @__omp_outlined__18(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -755,10 +755,10 @@ entry: define internal void @__omp_outlined__18_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -769,7 +769,7 @@ entry: define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 { entry: - %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) %0 = call i32 @__kmpc_global_thread_num(ptr @2) call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs, i64 0) ret void @@ -777,8 +777,8 @@ entry: define internal void @__omp_outlined__19(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca ptr, align 8 - %.bound_tid..addr = alloca ptr, align 8 + %.global_tid..addr = alloca ptr, align 8, addrspace(0) + %.bound_tid..addr = alloca ptr, align 8, addrspace(0) store ptr %.global_tid., ptr %.global_tid..addr, align 8 store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 @@ -787,10 +787,10 @@ entry: define internal void @__omp_outlined__19_wrapper(i16 zeroext %0, i32 %1) #0 { entry: - %.addr = alloca i16, align 2 - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr = alloca i16, align 2, addrspace(0) + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 0, ptr %.zero.addr, align 4 store i16 %0, ptr %.addr, align 2 store i32 %1, ptr %.addr1, align 4 @@ -913,8 +913,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]]) ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -931,8 +931,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] ; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] ; AMDGPU1-NEXT: ret void @@ -976,8 +976,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]]) ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -994,9 +994,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__1 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] @@ -1008,8 +1008,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p0() #[[ATTR11:[0-9]+]] ; AMDGPU1-NEXT: ret void ; @@ -1018,10 +1018,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -1031,8 +1031,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU1-NEXT: ret void ; @@ -1041,10 +1041,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -1054,8 +1054,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]]) ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -1072,8 +1072,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__4 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] ; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] @@ -1086,7 +1086,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; AMDGPU1-SAME: () #[[ATTR6:[0-9]+]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU1-NEXT: ret void @@ -1096,7 +1096,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; AMDGPU1-SAME: () #[[ATTR1]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU1-NEXT: ret void @@ -1106,8 +1106,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU1-NEXT: ret void ; @@ -1116,10 +1116,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -1129,7 +1129,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; AMDGPU1-SAME: () #[[ATTR6]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU1-NEXT: ret void @@ -1139,7 +1139,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; AMDGPU1-SAME: () #[[ATTR1]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU1-NEXT: ret void @@ -1149,8 +1149,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]]) ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -1167,9 +1167,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__6 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -1180,8 +1180,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU1-NEXT: ret void ; @@ -1190,10 +1190,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -1203,8 +1203,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU1-NEXT: ret void ; @@ -1213,10 +1213,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -1226,8 +1226,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]]) ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -1244,9 +1244,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__9 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -1257,8 +1257,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU1-NEXT: ret void ; @@ -1267,10 +1267,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -1280,8 +1280,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU1-NEXT: ret void ; @@ -1290,10 +1290,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -1303,8 +1303,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]]) ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -1321,9 +1321,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__12 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -1334,8 +1334,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU1-NEXT: ret void ; @@ -1344,10 +1344,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -1357,8 +1357,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU1-NEXT: ret void ; @@ -1367,10 +1367,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -1380,8 +1380,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]]) ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -1398,8 +1398,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__15 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] ; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] ; AMDGPU1-NEXT: ret void @@ -1409,7 +1409,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized ; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -1430,7 +1430,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after ; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -1451,8 +1451,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 ; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]]) ; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -1469,8 +1469,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__16 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @weak_callee_empty() #[[ATTR9]] ; AMDGPU1-NEXT: ret void ; @@ -1486,8 +1486,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU1-NEXT: ret void ; @@ -1496,10 +1496,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -1509,8 +1509,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU1-NEXT: ret void ; @@ -1519,10 +1519,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -1532,7 +1532,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; AMDGPU1-SAME: () #[[ATTR6]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU1-NEXT: ret void @@ -1542,7 +1542,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; AMDGPU1-SAME: () #[[ATTR1]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU1-NEXT: ret void @@ -1552,8 +1552,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19 ; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU1-NEXT: ret void ; @@ -1562,10 +1562,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper ; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU1-NEXT: entry: -; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU1-NEXT: ret void @@ -2237,8 +2237,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]]) ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2255,8 +2255,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] ; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] ; AMDGPU2-NEXT: ret void @@ -2300,8 +2300,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]]) ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2318,9 +2318,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__1 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] @@ -2332,8 +2332,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p0() #[[ATTR11:[0-9]+]] ; AMDGPU2-NEXT: ret void ; @@ -2342,10 +2342,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2355,8 +2355,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU2-NEXT: ret void ; @@ -2365,10 +2365,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2378,8 +2378,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]]) ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2396,8 +2396,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__4 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] ; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] @@ -2410,7 +2410,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; AMDGPU2-SAME: () #[[ATTR6:[0-9]+]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU2-NEXT: ret void @@ -2420,7 +2420,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; AMDGPU2-SAME: () #[[ATTR1]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU2-NEXT: ret void @@ -2430,8 +2430,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU2-NEXT: ret void ; @@ -2440,10 +2440,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2453,7 +2453,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; AMDGPU2-SAME: () #[[ATTR6]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU2-NEXT: ret void @@ -2463,7 +2463,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; AMDGPU2-SAME: () #[[ATTR1]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU2-NEXT: ret void @@ -2473,8 +2473,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]]) ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2491,9 +2491,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__6 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -2504,8 +2504,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU2-NEXT: ret void ; @@ -2514,10 +2514,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2527,8 +2527,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU2-NEXT: ret void ; @@ -2537,10 +2537,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2550,8 +2550,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]]) ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2568,9 +2568,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__9 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -2581,8 +2581,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU2-NEXT: ret void ; @@ -2591,10 +2591,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2604,8 +2604,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU2-NEXT: ret void ; @@ -2614,10 +2614,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2627,8 +2627,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]]) ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2645,9 +2645,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__12 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -2658,8 +2658,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU2-NEXT: ret void ; @@ -2668,10 +2668,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2681,8 +2681,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU2-NEXT: ret void ; @@ -2691,10 +2691,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2704,8 +2704,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]]) ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2722,8 +2722,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__15 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] ; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] ; AMDGPU2-NEXT: ret void @@ -2733,7 +2733,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized ; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -2754,7 +2754,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after ; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -2775,8 +2775,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 ; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]]) ; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2793,8 +2793,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__16 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @weak_callee_empty() #[[ATTR9]] ; AMDGPU2-NEXT: ret void ; @@ -2810,8 +2810,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU2-NEXT: ret void ; @@ -2820,10 +2820,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2833,8 +2833,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU2-NEXT: ret void ; @@ -2843,10 +2843,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2856,7 +2856,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; AMDGPU2-SAME: () #[[ATTR6]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU2-NEXT: ret void @@ -2866,7 +2866,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; AMDGPU2-SAME: () #[[ATTR1]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU2-NEXT: ret void @@ -2876,8 +2876,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19 ; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU2-NEXT: ret void ; @@ -2886,10 +2886,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper ; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU2-NEXT: entry: -; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU2-NEXT: ret void @@ -2899,8 +2899,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]]) ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2917,8 +2917,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] ; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] ; AMDGPU3-NEXT: ret void @@ -2962,8 +2962,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]]) ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -2980,9 +2980,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__1 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] @@ -2994,8 +2994,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p0() #[[ATTR11:[0-9]+]] ; AMDGPU3-NEXT: ret void ; @@ -3004,10 +3004,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void @@ -3017,8 +3017,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU3-NEXT: ret void ; @@ -3027,10 +3027,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void @@ -3040,8 +3040,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]]) ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -3058,8 +3058,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__4 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] ; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] @@ -3072,7 +3072,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; AMDGPU3-SAME: () #[[ATTR6:[0-9]+]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU3-NEXT: ret void @@ -3082,7 +3082,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; AMDGPU3-SAME: () #[[ATTR1]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU3-NEXT: ret void @@ -3092,8 +3092,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU3-NEXT: ret void ; @@ -3102,10 +3102,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void @@ -3115,7 +3115,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; AMDGPU3-SAME: () #[[ATTR6]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU3-NEXT: ret void @@ -3125,7 +3125,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; AMDGPU3-SAME: () #[[ATTR1]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU3-NEXT: ret void @@ -3135,8 +3135,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]]) ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -3153,9 +3153,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__6 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -3166,8 +3166,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU3-NEXT: ret void ; @@ -3176,10 +3176,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void @@ -3189,8 +3189,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU3-NEXT: ret void ; @@ -3199,10 +3199,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void @@ -3212,8 +3212,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]]) ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -3230,9 +3230,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__9 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -3243,8 +3243,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU3-NEXT: ret void ; @@ -3253,10 +3253,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void @@ -3266,8 +3266,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU3-NEXT: ret void ; @@ -3276,10 +3276,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void @@ -3289,8 +3289,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]]) ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -3307,9 +3307,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__12 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) @@ -3320,8 +3320,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU3-NEXT: ret void ; @@ -3330,10 +3330,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void @@ -3343,8 +3343,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p1() #[[ATTR11]] ; AMDGPU3-NEXT: ret void ; @@ -3353,10 +3353,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void @@ -3366,8 +3366,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]]) ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -3384,8 +3384,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__15 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] ; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] ; AMDGPU3-NEXT: ret void @@ -3395,7 +3395,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized ; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -3416,7 +3416,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after ; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -3437,8 +3437,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 ; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]]) ; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -3455,8 +3455,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__16 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @weak_callee_empty() #[[ATTR9]] ; AMDGPU3-NEXT: ret void ; @@ -3472,8 +3472,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU3-NEXT: ret void ; @@ -3482,10 +3482,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void @@ -3495,8 +3495,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU3-NEXT: ret void ; @@ -3505,10 +3505,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void @@ -3518,7 +3518,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; AMDGPU3-SAME: () #[[ATTR6]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU3-NEXT: ret void @@ -3528,7 +3528,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; AMDGPU3-SAME: () #[[ATTR1]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU3-NEXT: ret void @@ -3538,8 +3538,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19 ; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] ; AMDGPU3-NEXT: ret void ; @@ -3548,10 +3548,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper ; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { ; AMDGPU3-NEXT: entry: -; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(0) +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU3-NEXT: ret void diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index 1a629ecfee06d..a1395a2349e01 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -225,8 +225,8 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug ; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -260,8 +260,8 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-DISABLED1-NEXT: entry: ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -310,8 +310,8 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug ; AMDGPU-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -392,8 +392,8 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] ; entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -415,7 +415,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU: for.cond: ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -451,7 +451,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED1: for.cond: ; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -469,7 +469,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED2: for.cond: ; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -521,7 +521,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; entry: - %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) br label %for.cond for.cond: ; preds = %for.body, %entry @@ -588,9 +588,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void @@ -608,9 +608,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { ; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: ret void @@ -618,9 +618,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: ret void @@ -646,9 +646,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED2-NEXT: ret void ; entry: - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 %1, ptr %.addr1, align 4, !tbaa !18 store i32 0, ptr %.zero.addr, align 4 call void @__kmpc_get_shared_variables(ptr %global_args) @@ -661,8 +661,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -696,8 +696,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED1-NEXT: entry: ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -746,8 +746,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -828,8 +828,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] ; entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -852,7 +852,7 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr ; AMDGPU-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]] ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -893,7 +893,7 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED1-NEXT: entry: ; AMDGPU-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED1-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr ; AMDGPU-DISABLED1-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]] ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] @@ -914,7 +914,7 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED2-NEXT: entry: ; AMDGPU-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) -; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED2-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr ; AMDGPU-DISABLED2-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]] ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] @@ -972,7 +972,7 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; entry: - %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) call void @use(ptr nocapture %x) #10 br label %for.cond @@ -1041,9 +1041,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void @@ -1061,9 +1061,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: ret void @@ -1071,9 +1071,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: ret void @@ -1099,9 +1099,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED2-NEXT: ret void ; entry: - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 %1, ptr %.addr1, align 4, !tbaa !18 store i32 0, ptr %.zero.addr, align 4 call void @__kmpc_get_shared_variables(ptr %global_args) @@ -1115,8 +1115,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -1150,8 +1150,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED1-NEXT: entry: ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -1200,8 +1200,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -1282,8 +1282,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] ; entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -1305,7 +1305,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU: for.cond: ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -1343,7 +1343,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED1: for.cond: ; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -1362,7 +1362,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED2: for.cond: ; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -1417,7 +1417,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; entry: - %captured_vars_addrs = alloca [1 x ptr], align 8 + %captured_vars_addrs = alloca [1 x ptr], align 8, addrspace(0) %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) br label %for.cond @@ -1508,9 +1508,9 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] @@ -1532,9 +1532,9 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] @@ -1544,9 +1544,9 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] @@ -1578,9 +1578,9 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED2-NEXT: ret void ; entry: - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 %1, ptr %.addr1, align 4, !tbaa !18 store i32 0, ptr %.zero.addr, align 4 call void @__kmpc_get_shared_variables(ptr %global_args) @@ -1595,8 +1595,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -1630,8 +1630,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED1-NEXT: entry: ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -1680,8 +1680,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -1762,8 +1762,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] ; entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -1785,7 +1785,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: br label [[REGION_CHECK_TID:%.*]] ; AMDGPU: region.check.tid: ; AMDGPU-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block() @@ -1851,7 +1851,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6 ; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED1: for.cond: @@ -1871,7 +1871,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6 ; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]] ; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED2: for.cond: @@ -1929,7 +1929,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; entry: - %captured_vars_addrs = alloca [1 x ptr], align 8 + %captured_vars_addrs = alloca [1 x ptr], align 8, addrspace(0) %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) store i32 42, ptr %x, align 4, !tbaa !18 br label %for.cond @@ -2021,9 +2021,9 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] @@ -2045,9 +2045,9 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] @@ -2057,9 +2057,9 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] @@ -2091,9 +2091,9 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED2-NEXT: ret void ; entry: - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 %1, ptr %.addr1, align 4, !tbaa !18 store i32 0, ptr %.zero.addr, align 4 call void @__kmpc_get_shared_variables(ptr %global_args) @@ -2109,8 +2109,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -2200,8 +2200,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED1-NEXT: entry: ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -2245,8 +2245,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -2320,8 +2320,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] ; entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -2387,7 +2387,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_ ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -2492,7 +2492,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_ ; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED1-NEXT: entry: ; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) ; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -2544,7 +2544,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_ ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 ; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) ; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -2628,7 +2628,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_ ; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]] ; entry: - %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -2813,9 +2813,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void @@ -2833,9 +2833,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper ; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED1-NEXT: entry: -; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED1-NEXT: ret void @@ -2843,9 +2843,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper ; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-DISABLED2-NEXT: entry: -; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED2-NEXT: ret void @@ -2871,9 +2871,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED2-NEXT: ret void ; entry: - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 %1, ptr %.addr1, align 4, !tbaa !18 store i32 0, ptr %.zero.addr, align 4 call void @__kmpc_get_shared_variables(ptr %global_args) diff --git a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll index 953ecb2ddd8a6..a4acf528e59a2 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll @@ -42,7 +42,7 @@ target triple = "amdgcn-amd-amdhsa" ; Function Attrs: alwaysinline convergent norecurse nounwind define weak_odr amdgpu_kernel void @__omp_offloading_20_11e3950_main_l12(ptr %dyn, i64 noundef %nxyz, i64 noundef %ng, ptr noundef nonnull align 8 dereferenceable(8) %aa) local_unnamed_addr #0 { entry: - %ng1 = alloca i32, align 4 + %ng1 = alloca i32, align 4, addrspace(0) %captured_vars_addrs = alloca [2 x ptr], align 8, addrspace(5) %0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_20_11e3950_main_l12_kernel_environment to ptr), ptr %dyn) %exec_user_code = icmp eq i32 %0, -1 diff --git a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll index ef8caf48e57b7..95bb97731bfd6 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll @@ -47,8 +47,8 @@ define internal void @spmd_callees__debug(i1 %c) { ; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees__debug ; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -107,8 +107,8 @@ define internal void @spmd_callees__debug(i1 %c) { ; NVPTX-NEXT: br label [[COMMON_RET]] ; entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) %0 = call i32 @__kmpc_target_init(ptr @spmd_callees_kernel_environment, ptr null) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -133,7 +133,7 @@ define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., p ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable1 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU: for.cond: ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -167,7 +167,7 @@ define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., p ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; entry: - %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) br label %for.cond for.cond: ; preds = %for.body, %entry @@ -214,9 +214,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]] ; AMDGPU-NEXT: ret void @@ -232,9 +232,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: ret void ; entry: - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 %1, ptr %.addr1, align 4, !tbaa !18 store i32 0, ptr %.zero.addr, align 4 call void @__kmpc_get_shared_variables(ptr %global_args) @@ -250,7 +250,7 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr ; AMDGPU-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR6]] ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -288,7 +288,7 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] ; entry: - %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8, addrspace(0) %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) call void @use(ptr nocapture %x) #10 br label %for.cond @@ -337,9 +337,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]] ; AMDGPU-NEXT: ret void @@ -355,9 +355,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: ret void ; entry: - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 %1, ptr %.addr1, align 4, !tbaa !18 store i32 0, ptr %.zero.addr, align 4 call void @__kmpc_get_shared_variables(ptr %global_args) @@ -374,8 +374,8 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 { ; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callee_kernel_environment, ptr null) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -492,8 +492,8 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 { ; NVPTX-NEXT: br label [[COMMON_RET]] ; entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) %0 = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callee_kernel_environment, ptr null) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -518,7 +518,7 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable3 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(0) ; AMDGPU-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR10]] ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU: for.cond: @@ -558,7 +558,7 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; entry: - %captured_vars_addrs = alloca [1 x ptr], align 8 + %captured_vars_addrs = alloca [1 x ptr], align 8, addrspace(0) %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) br label %for.cond @@ -617,9 +617,9 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper ; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(0) ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]] @@ -639,9 +639,9 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: ret void ; entry: - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 %1, ptr %.addr1, align 4, !tbaa !18 store i32 0, ptr %.zero.addr, align 4 call void @__kmpc_get_shared_variables(ptr %global_args) @@ -658,8 +658,8 @@ define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees_metadata ; AMDGPU-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_metadata_kernel_environment, ptr null) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] @@ -692,8 +692,8 @@ define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 { ; NVPTX-NEXT: br label [[COMMON_RET]] ; entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) %0 = call i32 @__kmpc_target_init(ptr @spmd_callees_metadata_kernel_environment, ptr null) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -718,8 +718,8 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 { ; AMDGPU-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(0) +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(0) ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callees_metadata_kernel_environment, ptr null) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] @@ -834,8 +834,8 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 { ; NVPTX-NEXT: br label [[COMMON_RET]] ; entry: - %.zero.addr = alloca i32, align 4 - %.threadid_temp. = alloca i32, align 4 + %.zero.addr = alloca i32, align 4, addrspace(0) + %.threadid_temp. = alloca i32, align 4, addrspace(0) %0 = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callees_metadata_kernel_environment, ptr null) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -1043,9 +1043,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; ; entry: - %.addr1 = alloca i32, align 4 - %.zero.addr = alloca i32, align 4 - %global_args = alloca ptr, align 8 + %.addr1 = alloca i32, align 4, addrspace(0) + %.zero.addr = alloca i32, align 4, addrspace(0) + %global_args = alloca ptr, align 8, addrspace(0) store i32 %1, ptr %.addr1, align 4, !tbaa !18 store i32 0, ptr %.zero.addr, align 4 call void @__kmpc_get_shared_variables(ptr %global_args) diff --git a/llvm/test/Transforms/SafeStack/X86/alloca-addrspace-wrong-addrspace.ll b/llvm/test/Transforms/SafeStack/X86/alloca-addrspace-wrong-addrspace.ll index 4f780fa9695ff..829f51967de6f 100644 --- a/llvm/test/Transforms/SafeStack/X86/alloca-addrspace-wrong-addrspace.ll +++ b/llvm/test/Transforms/SafeStack/X86/alloca-addrspace-wrong-addrspace.ll @@ -18,7 +18,7 @@ define void @correct_alloca_addrspace() nounwind uwtable safestack { ; TLS-NEXT: ret void ; entry: - %a = alloca i8, align 8 + %a = alloca i8, align 8, addrspace(0) call void @Capture(ptr %a) ret void }