Skip to content

Commit a696662

Browse files
kevinsalaro-i
authored andcommitted
[Clang][OpenMP] Switch to __kmpc_parallel_60 with strict parameter (llvm#171082)
This commit switches the `__kmpc_parallel_51` to `__kmpc_parallel_60`, and adds the strict boolean for the number of threads.
1 parent 60d55cb commit a696662

File tree

63 files changed

+1381
-734
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+1381
-734
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,6 +1510,9 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
15101510
else
15111511
NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty);
15121512

1513+
// No strict prescriptiveness for the number of threads.
1514+
llvm::Value *StrictNumThreadsVal = llvm::ConstantInt::get(CGF.Int32Ty, 0);
1515+
15131516
assert(IfCondVal && "Expected a value");
15141517
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
15151518
llvm::Value *Args[] = {
@@ -1522,9 +1525,11 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
15221525
ID,
15231526
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
15241527
CGF.VoidPtrPtrTy),
1525-
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
1528+
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size()),
1529+
StrictNumThreadsVal};
1530+
15261531
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1527-
CGM.getModule(), OMPRTL___kmpc_parallel_51),
1532+
CGM.getModule(), OMPRTL___kmpc_parallel_60),
15281533
Args);
15291534
};
15301535

clang/test/OpenMP/amdgcn_target_device_vla.cpp

Lines changed: 171 additions & 2 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,158 @@ void write_to_aligned_array(int *a, int N) {
5252
// CHECK-AMD-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
5353
// CHECK-AMD-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
5454
// CHECK-AMD-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
55+
// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
56+
// CHECK-AMD-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
57+
// CHECK-AMD-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
58+
// CHECK-AMD: omp.precond.then:
59+
// CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
60+
// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
61+
// CHECK-AMD-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
62+
// CHECK-AMD-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
63+
// CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
64+
// CHECK-AMD-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
65+
// CHECK-AMD-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
66+
// CHECK-AMD-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
67+
// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
68+
// CHECK-AMD-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
69+
// CHECK-AMD-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
70+
// CHECK-AMD-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
71+
// CHECK-AMD-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
72+
// CHECK-AMD: cond.true:
73+
// CHECK-AMD-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
74+
// CHECK-AMD-NEXT: br label [[COND_END:%.*]]
75+
// CHECK-AMD: cond.false:
76+
// CHECK-AMD-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
77+
// CHECK-AMD-NEXT: br label [[COND_END]]
78+
// CHECK-AMD: cond.end:
79+
// CHECK-AMD-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ]
80+
// CHECK-AMD-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
81+
// CHECK-AMD-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
82+
// CHECK-AMD-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV_ASCAST]], align 4
83+
// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
84+
// CHECK-AMD: omp.inner.for.cond:
85+
// CHECK-AMD-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
86+
// CHECK-AMD-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
87+
// CHECK-AMD-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], 1
88+
// CHECK-AMD-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP11]], [[ADD]]
89+
// CHECK-AMD-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
90+
// CHECK-AMD: omp.inner.for.body:
91+
// CHECK-AMD-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
92+
// CHECK-AMD-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64
93+
// CHECK-AMD-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
94+
// CHECK-AMD-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
95+
// CHECK-AMD-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
96+
// CHECK-AMD-NEXT: store i32 [[TMP17]], ptr [[N_CASTED_ASCAST]], align 4
97+
// CHECK-AMD-NEXT: [[TMP18:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
98+
// CHECK-AMD-NEXT: [[TMP19:%.*]] = load ptr, ptr [[APTR_ADDR_ASCAST]], align 8
99+
// CHECK-AMD-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
100+
// CHECK-AMD-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP14]] to ptr
101+
// CHECK-AMD-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8
102+
// CHECK-AMD-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
103+
// CHECK-AMD-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP16]] to ptr
104+
// CHECK-AMD-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8
105+
// CHECK-AMD-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
106+
// CHECK-AMD-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP18]] to ptr
107+
// CHECK-AMD-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
108+
// CHECK-AMD-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
109+
// CHECK-AMD-NEXT: store ptr [[TMP19]], ptr [[TMP26]], align 8
110+
// CHECK-AMD-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
111+
// CHECK-AMD-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
112+
// CHECK-AMD-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4, i32 0)
113+
// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
114+
// CHECK-AMD: omp.inner.for.inc:
115+
// CHECK-AMD-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
116+
// CHECK-AMD-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
117+
// CHECK-AMD-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
118+
// CHECK-AMD-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
119+
// CHECK-AMD-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
120+
// CHECK-AMD-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
121+
// CHECK-AMD-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
122+
// CHECK-AMD-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
123+
// CHECK-AMD-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
124+
// CHECK-AMD-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
125+
// CHECK-AMD-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
126+
// CHECK-AMD-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
127+
// CHECK-AMD-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
128+
// CHECK-AMD-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
129+
// CHECK-AMD-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
130+
// CHECK-AMD-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
131+
// CHECK-AMD: cond.true10:
132+
// CHECK-AMD-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
133+
// CHECK-AMD-NEXT: br label [[COND_END12:%.*]]
134+
// CHECK-AMD: cond.false11:
135+
// CHECK-AMD-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
136+
// CHECK-AMD-NEXT: br label [[COND_END12]]
137+
// CHECK-AMD: cond.end12:
138+
// CHECK-AMD-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
139+
// CHECK-AMD-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
140+
// CHECK-AMD-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
141+
// CHECK-AMD-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV_ASCAST]], align 4
142+
// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND]]
143+
// CHECK-AMD: omp.inner.for.end:
144+
// CHECK-AMD-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
145+
// CHECK-AMD: omp.loop.exit:
146+
// CHECK-AMD-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
147+
// CHECK-AMD-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
148+
// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP41]])
149+
// CHECK-AMD-NEXT: br label [[OMP_PRECOND_END]]
150+
// CHECK-AMD: omp.precond.end:
151+
// CHECK-AMD-NEXT: ret void
152+
//
153+
//
154+
// CHECK-AMD-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_omp_outlined_omp_outlined
155+
// CHECK-AMD-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[APTR:%.*]]) #[[ATTR1]] {
156+
// CHECK-AMD-NEXT: entry:
157+
// CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
158+
// CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
159+
// CHECK-AMD-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
160+
// CHECK-AMD-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
161+
// CHECK-AMD-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
162+
// CHECK-AMD-NEXT: [[APTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
163+
// CHECK-AMD-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
164+
// CHECK-AMD-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
165+
// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
166+
// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
167+
// CHECK-AMD-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
168+
// CHECK-AMD-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
169+
// CHECK-AMD-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
170+
// CHECK-AMD-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
171+
// CHECK-AMD-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
172+
// CHECK-AMD-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
173+
// CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
174+
// CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
175+
// CHECK-AMD-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
176+
// CHECK-AMD-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
177+
// CHECK-AMD-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
178+
// CHECK-AMD-NEXT: [[APTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[APTR_ADDR]] to ptr
179+
// CHECK-AMD-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
180+
// CHECK-AMD-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
181+
// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
182+
// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
183+
// CHECK-AMD-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
184+
// CHECK-AMD-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
185+
// CHECK-AMD-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
186+
// CHECK-AMD-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
187+
// CHECK-AMD-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
188+
// CHECK-AMD-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
189+
// CHECK-AMD-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
190+
// CHECK-AMD-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
191+
// CHECK-AMD-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
192+
// CHECK-AMD-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
193+
// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
194+
// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8
195+
// CHECK-AMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
196+
// CHECK-AMD-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
197+
// CHECK-AMD-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
198+
// CHECK-AMD-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
199+
// CHECK-AMD-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
200+
// CHECK-AMD-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
201+
// CHECK-AMD-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
202+
// CHECK-AMD-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
203+
// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
204+
// CHECK-AMD-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
205+
// CHECK-AMD-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
206+
// CHECK-AMD: omp.precond.then:
55207
// CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
56208
// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
57209
// CHECK-AMD-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB_ASCAST]], align 4

clang/test/OpenMP/declare_target_codegen_globalization.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ int maini1() {
4040
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
4141
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
4242
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
43-
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
43+
// CHECK1-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1, i32 0)
4444
// CHECK1-NEXT: call void @__kmpc_target_deinit()
4545
// CHECK1-NEXT: ret void
4646
// CHECK1: worker.exit:

clang/test/OpenMP/metadirective_device_arch_codegen.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ int metadirective1() {
4242
// CHECK: entry:
4343
// CHECK: %{{[0-9]}} = call i32 @__kmpc_target_init
4444
// CHECK: user_code.entry:
45-
// CHECK: call void @[[METADIRECTIVE]]_omp_outlined
46-
// CHECK-NOT: call void @__kmpc_parallel_51
45+
// CHECK: call{{.*}} void @[[METADIRECTIVE]]_omp_outlined
46+
// CHECK-NOT: call{{.*}} void @__kmpc_parallel_60
4747
// CHECK: ret void
4848

4949

clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ int amdgcn_device_isa_selected() {
2424

2525
// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_selected
2626
// CHECK: user_code.entry:
27-
// CHECK: call void @__kmpc_parallel_51
27+
// CHECK: call void @__kmpc_parallel_60
2828
// CHECK-NOT: call i32 @__kmpc_single
2929
// CHECK: ret void
3030

@@ -47,7 +47,7 @@ int amdgcn_device_isa_not_selected() {
4747
// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_not_selected
4848
// CHECK: user_code.entry:
4949
// CHECK: call i32 @__kmpc_single
50-
// CHECK-NOT: call void @__kmpc_parallel_51
50+
// CHECK-NOT: call void @__kmpc_parallel_60
5151
// CHECK: ret void
5252

5353
#endif

0 commit comments

Comments
 (0)