Skip to content

Commit 0e92beb

Browse files
authored
[Clang][OpenMP] Switch to __kmpc_parallel_60 with strict parameter (#171082)
This commit switches the `__kmpc_parallel_51` to `__kmpc_parallel_60`, and adds the strict boolean for the number of threads.
1 parent 039c834 commit 0e92beb

File tree

63 files changed

+691
-684
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+691
-684
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,6 +1291,9 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
12911291
else
12921292
NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty);
12931293

1294+
// No strict prescriptiveness for the number of threads.
1295+
llvm::Value *StrictNumThreadsVal = llvm::ConstantInt::get(CGF.Int32Ty, 0);
1296+
12941297
assert(IfCondVal && "Expected a value");
12951298
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
12961299
llvm::Value *Args[] = {
@@ -1303,9 +1306,11 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
13031306
ID,
13041307
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
13051308
CGF.VoidPtrPtrTy),
1306-
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
1309+
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size()),
1310+
StrictNumThreadsVal};
1311+
13071312
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1308-
CGM.getModule(), OMPRTL___kmpc_parallel_51),
1313+
CGM.getModule(), OMPRTL___kmpc_parallel_60),
13091314
Args);
13101315
};
13111316

clang/test/OpenMP/amdgcn_target_device_vla.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ int main() {
327327
// CHECK-NEXT: store ptr [[TMP1]], ptr [[TMP29]], align 8
328328
// CHECK-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
329329
// CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
330-
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 5)
330+
// CHECK-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 5, i32 0)
331331
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
332332
// CHECK: omp.inner.for.inc:
333333
// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
@@ -701,7 +701,7 @@ int main() {
701701
// CHECK-NEXT: store ptr [[A]], ptr [[TMP27]], align 8
702702
// CHECK-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
703703
// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
704-
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
704+
// CHECK-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3, i32 0)
705705
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
706706
// CHECK-NEXT: br label [[FOR_COND:%.*]]
707707
// CHECK: for.cond:
@@ -1070,7 +1070,7 @@ int main() {
10701070
// CHECK-NEXT: store ptr [[A]], ptr [[TMP28]], align 8
10711071
// CHECK-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
10721072
// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
1073-
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
1073+
// CHECK-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3, i32 0)
10741074
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
10751075
// CHECK-NEXT: br label [[FOR_COND:%.*]]
10761076
// CHECK: for.cond:

clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ void write_to_aligned_array(int *a, int N) {
158158
// CHECK-AMD-NEXT: store ptr [[TMP19]], ptr [[TMP26]], align 8
159159
// CHECK-AMD-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
160160
// CHECK-AMD-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
161-
// CHECK-AMD-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
161+
// CHECK-AMD-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4, i32 0)
162162
// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
163163
// CHECK-AMD: omp.inner.for.inc:
164164
// CHECK-AMD-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4

clang/test/OpenMP/declare_target_codegen_globalization.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ int maini1() {
4040
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
4141
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
4242
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
43-
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
43+
// CHECK1-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1, i32 0)
4444
// CHECK1-NEXT: call void @__kmpc_target_deinit()
4545
// CHECK1-NEXT: ret void
4646
// CHECK1: worker.exit:

clang/test/OpenMP/metadirective_device_arch_codegen.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ int metadirective1() {
4949
// CHECK: %{{[0-9]}} = call{{.*}} i32 @__kmpc_target_init
5050
// CHECK: user_code.entry:
5151
// CHECK: call{{.*}} void @[[METADIRECTIVE]]_omp_outlined
52-
// CHECK-NOT: call{{.*}} void @__kmpc_parallel_51
52+
// CHECK-NOT: call{{.*}} void @__kmpc_parallel_60
5353
// CHECK: ret void
5454

5555

clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ int amdgcn_device_isa_selected() {
2424

2525
// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_selected
2626
// CHECK: user_code.entry:
27-
// CHECK: call void @__kmpc_parallel_51
27+
// CHECK: call void @__kmpc_parallel_60
2828
// CHECK-NOT: call i32 @__kmpc_single
2929
// CHECK: ret void
3030

@@ -47,7 +47,7 @@ int amdgcn_device_isa_not_selected() {
4747
// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_not_selected
4848
// CHECK: user_code.entry:
4949
// CHECK: call i32 @__kmpc_single
50-
// CHECK-NOT: call void @__kmpc_parallel_51
50+
// CHECK-NOT: call void @__kmpc_parallel_60
5151
// CHECK: ret void
5252

5353
#endif

0 commit comments

Comments
 (0)