Skip to content

Commit 23302a2

Browse files
authored
[offload][OpenMP] Remove device code for num_threads strict (#157893)
Due to potential performance issues, this commit temporarily removes support for the num_threads 'strict' modifier and its corresponding message and severity clauses on the device.
1 parent 3168a62 commit 23302a2

12 files changed

+160
-6188
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,9 +1210,9 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
12101210
if (!CGF.HaveInsertPoint())
12111211
return;
12121212

1213-
auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond, NumThreads,
1214-
NumThreadsModifier, Severity, Message](
1215-
CodeGenFunction &CGF, PrePostActionTy &Action) {
1213+
auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond,
1214+
NumThreads](CodeGenFunction &CGF,
1215+
PrePostActionTy &Action) {
12161216
CGBuilderTy &Bld = CGF.Builder;
12171217
llvm::Value *NumThreadsVal = NumThreads;
12181218
llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];
@@ -1260,22 +1260,21 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
12601260
NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty);
12611261

12621262
assert(IfCondVal && "Expected a value");
1263-
RuntimeFunction FnID = OMPRTL___kmpc_parallel_51;
12641263
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1265-
llvm::SmallVector<llvm::Value *, 10> Args(
1266-
{RTLoc, getThreadID(CGF, Loc), IfCondVal, NumThreadsVal,
1267-
llvm::ConstantInt::get(CGF.Int32Ty, -1), FnPtr, ID,
1268-
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
1269-
CGF.VoidPtrPtrTy),
1270-
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())});
1271-
if (NumThreadsModifier == OMPC_NUMTHREADS_strict) {
1272-
FnID = OMPRTL___kmpc_parallel_60;
1273-
Args.append({llvm::ConstantInt::get(CGM.Int32Ty, true),
1274-
emitSeverityClause(Severity),
1275-
emitMessageClause(CGF, Message)});
1276-
}
1277-
CGF.EmitRuntimeCall(
1278-
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
1264+
llvm::Value *Args[] = {
1265+
RTLoc,
1266+
getThreadID(CGF, Loc),
1267+
IfCondVal,
1268+
NumThreadsVal,
1269+
llvm::ConstantInt::get(CGF.Int32Ty, -1),
1270+
FnPtr,
1271+
ID,
1272+
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
1273+
CGF.VoidPtrPtrTy),
1274+
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
1275+
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1276+
CGM.getModule(), OMPRTL___kmpc_parallel_51),
1277+
Args);
12791278
};
12801279

12811280
RegionCodeGenTy RCG(ParallelGen);

clang/lib/CodeGen/CGOpenMPRuntimeGPU.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,6 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
165165
/// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
166166
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
167167
/// clause.
168-
/// If the modifier 'strict' is given:
169-
/// Emits call to void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32
170-
/// global_tid, kmp_int32 num_threads, int severity, const char *message) to
171-
/// generate code for 'num_threads' clause with 'strict' modifier.
172-
/// \param NumThreads An integer value of threads.
173168
void emitNumThreadsClause(
174169
CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
175170
OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown,
@@ -238,11 +233,11 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
238233
/// \param NumThreads The value corresponding to the num_threads clause, if
239234
/// any, or nullptr.
240235
/// \param NumThreadsModifier The modifier of the num_threads clause, if
241-
/// any, ignored otherwise.
236+
/// any, ignored otherwise. Currently unused on the device.
242237
/// \param Severity The severity corresponding to the num_threads clause, if
243-
/// any, ignored otherwise.
238+
/// any, ignored otherwise. Currently unused on the device.
244239
/// \param Message The message string corresponding to the num_threads clause,
245-
/// if any, or nullptr.
240+
/// if any, or nullptr. Currently unused on the device.
246241
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
247242
llvm::Function *OutlinedFn,
248243
ArrayRef<llvm::Value *> CapturedVars,

clang/test/AST/ByteCode/openmp.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ extern int omp_get_thread_num(void);
1717

1818
int test2() {
1919
int x = 0;
20-
int device_result[N] = {0};
20+
int result[N] = {0};
2121

22-
#pragma omp target parallel loop num_threads(strict: N) severity(warning) message("msg")
22+
#pragma omp parallel loop num_threads(strict: N) severity(warning) message("msg")
2323
for (int i = 0; i < N; i++) {
2424
x = omp_get_thread_num();
25-
device_result[i] = i + x;
25+
result[i] = i + x;
2626
}
2727
}
2828

clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp

Lines changed: 0 additions & 1095 deletions
This file was deleted.

clang/test/OpenMP/nvptx_target_codegen.cpp

Lines changed: 66 additions & 847 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp

Lines changed: 35 additions & 725 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp

Lines changed: 18 additions & 418 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp

Lines changed: 0 additions & 2956 deletions
This file was deleted.

llvm/include/llvm/Frontend/OpenMP/OMP.td

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2064,11 +2064,9 @@ def OMP_TargetParallel : Directive<[Spelling<"target parallel">]> {
20642064
let allowedOnceClauses = [
20652065
VersionedClause<OMPC_DefaultMap>,
20662066
VersionedClause<OMPC_Device>,
2067-
VersionedClause<OMPC_Message, 60>,
20682067
VersionedClause<OMPC_NumThreads>,
20692068
VersionedClause<OMPC_OMPX_DynCGroupMem>,
20702069
VersionedClause<OMPC_ProcBind>,
2071-
VersionedClause<OMPC_Severity, 60>,
20722070
VersionedClause<OMPC_ThreadLimit, 51>,
20732071
];
20742072
let leafConstructs = [OMP_Target, OMP_Parallel];
@@ -2096,14 +2094,12 @@ def OMP_TargetParallelDo : Directive<[Spelling<"target parallel do">]> {
20962094
VersionedClause<OMPC_Collapse>,
20972095
VersionedClause<OMPC_DefaultMap>,
20982096
VersionedClause<OMPC_Device>,
2099-
VersionedClause<OMPC_Message, 60>,
21002097
VersionedClause<OMPC_NoWait>,
21012098
VersionedClause<OMPC_NumThreads>,
21022099
VersionedClause<OMPC_Order, 50>,
21032100
VersionedClause<OMPC_Ordered>,
21042101
VersionedClause<OMPC_ProcBind>,
21052102
VersionedClause<OMPC_Schedule>,
2106-
VersionedClause<OMPC_Severity, 60>,
21072103
];
21082104
let leafConstructs = [OMP_Target, OMP_Parallel, OMP_Do];
21092105
let category = CA_Executable;
@@ -2127,7 +2123,6 @@ def OMP_TargetParallelDoSimd
21272123
VersionedClause<OMPC_LastPrivate>,
21282124
VersionedClause<OMPC_Linear>,
21292125
VersionedClause<OMPC_Map>,
2130-
VersionedClause<OMPC_Message, 60>,
21312126
VersionedClause<OMPC_NonTemporal>,
21322127
VersionedClause<OMPC_NoWait>,
21332128
VersionedClause<OMPC_NumThreads>,
@@ -2138,7 +2133,6 @@ def OMP_TargetParallelDoSimd
21382133
VersionedClause<OMPC_Reduction>,
21392134
VersionedClause<OMPC_SafeLen>,
21402135
VersionedClause<OMPC_Schedule>,
2141-
VersionedClause<OMPC_Severity, 60>,
21422136
VersionedClause<OMPC_Shared>,
21432137
VersionedClause<OMPC_SimdLen>,
21442138
VersionedClause<OMPC_UsesAllocators>,
@@ -2163,7 +2157,6 @@ def OMP_TargetParallelFor : Directive<[Spelling<"target parallel for">]> {
21632157
VersionedClause<OMPC_LastPrivate>,
21642158
VersionedClause<OMPC_Linear>,
21652159
VersionedClause<OMPC_Map>,
2166-
VersionedClause<OMPC_Message, 60>,
21672160
VersionedClause<OMPC_NoWait>,
21682161
VersionedClause<OMPC_NumThreads>,
21692162
VersionedClause<OMPC_OMPX_Attribute>,
@@ -2173,7 +2166,6 @@ def OMP_TargetParallelFor : Directive<[Spelling<"target parallel for">]> {
21732166
VersionedClause<OMPC_ProcBind>,
21742167
VersionedClause<OMPC_Reduction>,
21752168
VersionedClause<OMPC_Schedule>,
2176-
VersionedClause<OMPC_Severity, 60>,
21772169
VersionedClause<OMPC_Shared>,
21782170
VersionedClause<OMPC_UsesAllocators, 50>,
21792171
];
@@ -2203,7 +2195,6 @@ def OMP_TargetParallelForSimd
22032195
VersionedClause<OMPC_LastPrivate>,
22042196
VersionedClause<OMPC_Linear>,
22052197
VersionedClause<OMPC_Map>,
2206-
VersionedClause<OMPC_Message, 60>,
22072198
VersionedClause<OMPC_NonTemporal, 50>,
22082199
VersionedClause<OMPC_NoWait>,
22092200
VersionedClause<OMPC_NumThreads>,
@@ -2215,7 +2206,6 @@ def OMP_TargetParallelForSimd
22152206
VersionedClause<OMPC_Reduction>,
22162207
VersionedClause<OMPC_SafeLen>,
22172208
VersionedClause<OMPC_Schedule>,
2218-
VersionedClause<OMPC_Severity, 60>,
22192209
VersionedClause<OMPC_Shared>,
22202210
VersionedClause<OMPC_SimdLen>,
22212211
VersionedClause<OMPC_UsesAllocators, 50>,
@@ -2251,13 +2241,11 @@ def OMP_target_parallel_loop : Directive<[Spelling<"target parallel loop">]> {
22512241
VersionedClause<OMPC_Collapse>,
22522242
VersionedClause<OMPC_Default>,
22532243
VersionedClause<OMPC_DefaultMap>,
2254-
VersionedClause<OMPC_Message, 60>,
22552244
VersionedClause<OMPC_NoWait>,
22562245
VersionedClause<OMPC_NumThreads>,
22572246
VersionedClause<OMPC_OMPX_DynCGroupMem>,
22582247
VersionedClause<OMPC_Order>,
22592248
VersionedClause<OMPC_ProcBind>,
2260-
VersionedClause<OMPC_Severity, 60>,
22612249
VersionedClause<OMPC_ThreadLimit, 51>,
22622250
];
22632251
let leafConstructs = [OMP_Target, OMP_Parallel, OMP_loop];
@@ -2288,14 +2276,12 @@ def OMP_TargetSimd : Directive<[Spelling<"target simd">]> {
22882276
VersionedClause<OMPC_Collapse>,
22892277
VersionedClause<OMPC_DefaultMap>,
22902278
VersionedClause<OMPC_Device>,
2291-
VersionedClause<OMPC_Message, 60>,
22922279
VersionedClause<OMPC_NumThreads>,
22932280
VersionedClause<OMPC_OMPX_DynCGroupMem>,
22942281
VersionedClause<OMPC_Order, 50>,
22952282
VersionedClause<OMPC_ProcBind>,
22962283
VersionedClause<OMPC_SafeLen>,
22972284
VersionedClause<OMPC_Schedule>,
2298-
VersionedClause<OMPC_Severity, 60>,
22992285
VersionedClause<OMPC_SimdLen>,
23002286
VersionedClause<OMPC_ThreadLimit, 51>,
23012287
];
@@ -2388,14 +2374,12 @@ def OMP_TargetTeamsDistributeParallelDo
23882374
VersionedClause<OMPC_DefaultMap>,
23892375
VersionedClause<OMPC_Device>,
23902376
VersionedClause<OMPC_DistSchedule>,
2391-
VersionedClause<OMPC_Message, 60>,
23922377
VersionedClause<OMPC_NoWait>,
23932378
VersionedClause<OMPC_NumTeams>,
23942379
VersionedClause<OMPC_NumThreads>,
23952380
VersionedClause<OMPC_Order, 50>,
23962381
VersionedClause<OMPC_ProcBind>,
23972382
VersionedClause<OMPC_Schedule>,
2398-
VersionedClause<OMPC_Severity, 60>,
23992383
VersionedClause<OMPC_ThreadLimit>,
24002384
];
24012385
let leafConstructs =
@@ -2429,15 +2413,13 @@ def OMP_TargetTeamsDistributeParallelDoSimd
24292413
VersionedClause<OMPC_DefaultMap>,
24302414
VersionedClause<OMPC_Device>,
24312415
VersionedClause<OMPC_DistSchedule>,
2432-
VersionedClause<OMPC_Message, 60>,
24332416
VersionedClause<OMPC_NoWait>,
24342417
VersionedClause<OMPC_NumTeams>,
24352418
VersionedClause<OMPC_NumThreads>,
24362419
VersionedClause<OMPC_Order, 50>,
24372420
VersionedClause<OMPC_ProcBind>,
24382421
VersionedClause<OMPC_SafeLen>,
24392422
VersionedClause<OMPC_Schedule>,
2440-
VersionedClause<OMPC_Severity, 60>,
24412423
VersionedClause<OMPC_SimdLen>,
24422424
VersionedClause<OMPC_ThreadLimit>,
24432425
];
@@ -2463,7 +2445,6 @@ def OMP_TargetTeamsDistributeParallelFor
24632445
VersionedClause<OMPC_IsDevicePtr>,
24642446
VersionedClause<OMPC_LastPrivate>,
24652447
VersionedClause<OMPC_Map>,
2466-
VersionedClause<OMPC_Message, 60>,
24672448
VersionedClause<OMPC_NoWait>,
24682449
VersionedClause<OMPC_NumTeams>,
24692450
VersionedClause<OMPC_NumThreads>,
@@ -2473,7 +2454,6 @@ def OMP_TargetTeamsDistributeParallelFor
24732454
VersionedClause<OMPC_ProcBind>,
24742455
VersionedClause<OMPC_Reduction>,
24752456
VersionedClause<OMPC_Schedule>,
2476-
VersionedClause<OMPC_Severity, 60>,
24772457
VersionedClause<OMPC_Shared>,
24782458
VersionedClause<OMPC_ThreadLimit>,
24792459
VersionedClause<OMPC_UsesAllocators, 50>,
@@ -2505,7 +2485,6 @@ def OMP_TargetTeamsDistributeParallelForSimd
25052485
VersionedClause<OMPC_LastPrivate>,
25062486
VersionedClause<OMPC_Linear>,
25072487
VersionedClause<OMPC_Map>,
2508-
VersionedClause<OMPC_Message, 60>,
25092488
VersionedClause<OMPC_NonTemporal, 50>,
25102489
VersionedClause<OMPC_NoWait>,
25112490
VersionedClause<OMPC_NumTeams>,
@@ -2517,7 +2496,6 @@ def OMP_TargetTeamsDistributeParallelForSimd
25172496
VersionedClause<OMPC_Reduction>,
25182497
VersionedClause<OMPC_SafeLen>,
25192498
VersionedClause<OMPC_Schedule>,
2520-
VersionedClause<OMPC_Severity, 60>,
25212499
VersionedClause<OMPC_Shared>,
25222500
VersionedClause<OMPC_SimdLen>,
25232501
VersionedClause<OMPC_ThreadLimit>,

llvm/include/llvm/Frontend/OpenMP/OMPKinds.def

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,6 @@ __OMP_RTL(__kmpc_target_deinit, false, Void,)
472472
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
473473
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
474474
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
475-
__OMP_RTL(__kmpc_parallel_60, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
476-
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy, Int32, Int32, Int8Ptr)
477475
__OMP_RTL(__kmpc_for_static_loop_4, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8)
478476
__OMP_RTL(__kmpc_for_static_loop_4u, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8)
479477
__OMP_RTL(__kmpc_for_static_loop_8, false, Void, IdentPtr, VoidPtr, VoidPtr, Int64, Int64, Int64, Int8)
@@ -1087,10 +1085,6 @@ __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(),
10871085
ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt,
10881086
AttributeSet(), AttributeSet(), AttributeSet(),
10891087
SizeTyExt))
1090-
__OMP_RTL_ATTRS(__kmpc_parallel_60, AlwaysInlineAttrs, AttributeSet(),
1091-
ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt,
1092-
AttributeSet(), AttributeSet(), AttributeSet(),
1093-
SizeTyExt, SExt, SExt, AttributeSet()))
10941088
__OMP_RTL_ATTRS(__kmpc_serialized_parallel, InaccessibleArgOnlyAttrs,
10951089
AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs, SExt))
10961090
__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, InaccessibleArgOnlyAttrs,

0 commit comments

Comments
 (0)