Skip to content

Commit ee83f4f

Browse files
committed
[offload][OpenMP] Remove device code for num_threads strict
Due to potential performance issues, this commit temporarily removes support for the num_threads 'strict' modifier and its corresponding message and severity clauses on the device.
1 parent 71389a5 commit ee83f4f

File tree

11 files changed

+157
-6185
lines changed

11 files changed

+157
-6185
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,9 +1210,9 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
12101210
if (!CGF.HaveInsertPoint())
12111211
return;
12121212

1213-
auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond, NumThreads,
1214-
NumThreadsModifier, Severity, Message](
1215-
CodeGenFunction &CGF, PrePostActionTy &Action) {
1213+
auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond,
1214+
NumThreads](CodeGenFunction &CGF,
1215+
PrePostActionTy &Action) {
12161216
CGBuilderTy &Bld = CGF.Builder;
12171217
llvm::Value *NumThreadsVal = NumThreads;
12181218
llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];
@@ -1260,22 +1260,21 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
12601260
NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty);
12611261

12621262
assert(IfCondVal && "Expected a value");
1263-
RuntimeFunction FnID = OMPRTL___kmpc_parallel_51;
12641263
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1265-
llvm::SmallVector<llvm::Value *, 10> Args(
1266-
{RTLoc, getThreadID(CGF, Loc), IfCondVal, NumThreadsVal,
1267-
llvm::ConstantInt::get(CGF.Int32Ty, -1), FnPtr, ID,
1268-
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
1269-
CGF.VoidPtrPtrTy),
1270-
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())});
1271-
if (NumThreadsModifier == OMPC_NUMTHREADS_strict) {
1272-
FnID = OMPRTL___kmpc_parallel_60;
1273-
Args.append({llvm::ConstantInt::get(CGM.Int32Ty, true),
1274-
emitSeverityClause(Severity),
1275-
emitMessageClause(CGF, Message)});
1276-
}
1277-
CGF.EmitRuntimeCall(
1278-
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
1264+
llvm::Value *Args[] = {
1265+
RTLoc,
1266+
getThreadID(CGF, Loc),
1267+
IfCondVal,
1268+
NumThreadsVal,
1269+
llvm::ConstantInt::get(CGF.Int32Ty, -1),
1270+
FnPtr,
1271+
ID,
1272+
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
1273+
CGF.VoidPtrPtrTy),
1274+
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
1275+
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1276+
CGM.getModule(), OMPRTL___kmpc_parallel_51),
1277+
Args);
12791278
};
12801279

12811280
RegionCodeGenTy RCG(ParallelGen);

clang/lib/CodeGen/CGOpenMPRuntimeGPU.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,6 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
165165
/// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
166166
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
167167
/// clause.
168-
/// If the modifier 'strict' is given:
169-
/// Emits call to void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32
170-
/// global_tid, kmp_int32 num_threads, int severity, const char *message) to
171-
/// generate code for 'num_threads' clause with 'strict' modifier.
172-
/// \param NumThreads An integer value of threads.
173168
void emitNumThreadsClause(
174169
CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
175170
OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown,
@@ -238,11 +233,11 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
238233
/// \param NumThreads The value corresponding to the num_threads clause, if
239234
/// any, or nullptr.
240235
/// \param NumThreadsModifier The modifier of the num_threads clause, if
241-
/// any, ignored otherwise.
236+
/// any, ignored otherwise. Currently unused on the device.
242237
/// \param Severity The severity corresponding to the num_threads clause, if
243-
/// any, ignored otherwise.
238+
/// any, ignored otherwise. Currently unused on the device.
244239
/// \param Message The message string corresponding to the num_threads clause,
245-
/// if any, or nullptr.
240+
/// if any, or nullptr. Currently unused on the device.
246241
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
247242
llvm::Function *OutlinedFn,
248243
ArrayRef<llvm::Value *> CapturedVars,

clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp

Lines changed: 0 additions & 1095 deletions
This file was deleted.

clang/test/OpenMP/nvptx_target_codegen.cpp

Lines changed: 66 additions & 847 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp

Lines changed: 35 additions & 725 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp

Lines changed: 18 additions & 418 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp

Lines changed: 0 additions & 2956 deletions
This file was deleted.

llvm/include/llvm/Frontend/OpenMP/OMP.td

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2061,11 +2061,9 @@ def OMP_TargetParallel : Directive<[Spelling<"target parallel">]> {
20612061
let allowedOnceClauses = [
20622062
VersionedClause<OMPC_DefaultMap>,
20632063
VersionedClause<OMPC_Device>,
2064-
VersionedClause<OMPC_Message, 60>,
20652064
VersionedClause<OMPC_NumThreads>,
20662065
VersionedClause<OMPC_OMPX_DynCGroupMem>,
20672066
VersionedClause<OMPC_ProcBind>,
2068-
VersionedClause<OMPC_Severity, 60>,
20692067
VersionedClause<OMPC_ThreadLimit, 51>,
20702068
];
20712069
let leafConstructs = [OMP_Target, OMP_Parallel];
@@ -2093,14 +2091,12 @@ def OMP_TargetParallelDo : Directive<[Spelling<"target parallel do">]> {
20932091
VersionedClause<OMPC_Collapse>,
20942092
VersionedClause<OMPC_DefaultMap>,
20952093
VersionedClause<OMPC_Device>,
2096-
VersionedClause<OMPC_Message, 60>,
20972094
VersionedClause<OMPC_NoWait>,
20982095
VersionedClause<OMPC_NumThreads>,
20992096
VersionedClause<OMPC_Order, 50>,
21002097
VersionedClause<OMPC_Ordered>,
21012098
VersionedClause<OMPC_ProcBind>,
21022099
VersionedClause<OMPC_Schedule>,
2103-
VersionedClause<OMPC_Severity, 60>,
21042100
];
21052101
let leafConstructs = [OMP_Target, OMP_Parallel, OMP_Do];
21062102
let category = CA_Executable;
@@ -2124,7 +2120,6 @@ def OMP_TargetParallelDoSimd
21242120
VersionedClause<OMPC_LastPrivate>,
21252121
VersionedClause<OMPC_Linear>,
21262122
VersionedClause<OMPC_Map>,
2127-
VersionedClause<OMPC_Message, 60>,
21282123
VersionedClause<OMPC_NonTemporal>,
21292124
VersionedClause<OMPC_NoWait>,
21302125
VersionedClause<OMPC_NumThreads>,
@@ -2135,7 +2130,6 @@ def OMP_TargetParallelDoSimd
21352130
VersionedClause<OMPC_Reduction>,
21362131
VersionedClause<OMPC_SafeLen>,
21372132
VersionedClause<OMPC_Schedule>,
2138-
VersionedClause<OMPC_Severity, 60>,
21392133
VersionedClause<OMPC_Shared>,
21402134
VersionedClause<OMPC_SimdLen>,
21412135
VersionedClause<OMPC_UsesAllocators>,
@@ -2160,7 +2154,6 @@ def OMP_TargetParallelFor : Directive<[Spelling<"target parallel for">]> {
21602154
VersionedClause<OMPC_LastPrivate>,
21612155
VersionedClause<OMPC_Linear>,
21622156
VersionedClause<OMPC_Map>,
2163-
VersionedClause<OMPC_Message, 60>,
21642157
VersionedClause<OMPC_NoWait>,
21652158
VersionedClause<OMPC_NumThreads>,
21662159
VersionedClause<OMPC_OMPX_Attribute>,
@@ -2170,7 +2163,6 @@ def OMP_TargetParallelFor : Directive<[Spelling<"target parallel for">]> {
21702163
VersionedClause<OMPC_ProcBind>,
21712164
VersionedClause<OMPC_Reduction>,
21722165
VersionedClause<OMPC_Schedule>,
2173-
VersionedClause<OMPC_Severity, 60>,
21742166
VersionedClause<OMPC_Shared>,
21752167
VersionedClause<OMPC_UsesAllocators, 50>,
21762168
];
@@ -2200,7 +2192,6 @@ def OMP_TargetParallelForSimd
22002192
VersionedClause<OMPC_LastPrivate>,
22012193
VersionedClause<OMPC_Linear>,
22022194
VersionedClause<OMPC_Map>,
2203-
VersionedClause<OMPC_Message, 60>,
22042195
VersionedClause<OMPC_NonTemporal, 50>,
22052196
VersionedClause<OMPC_NoWait>,
22062197
VersionedClause<OMPC_NumThreads>,
@@ -2212,7 +2203,6 @@ def OMP_TargetParallelForSimd
22122203
VersionedClause<OMPC_Reduction>,
22132204
VersionedClause<OMPC_SafeLen>,
22142205
VersionedClause<OMPC_Schedule>,
2215-
VersionedClause<OMPC_Severity, 60>,
22162206
VersionedClause<OMPC_Shared>,
22172207
VersionedClause<OMPC_SimdLen>,
22182208
VersionedClause<OMPC_UsesAllocators, 50>,
@@ -2248,13 +2238,11 @@ def OMP_target_parallel_loop : Directive<[Spelling<"target parallel loop">]> {
22482238
VersionedClause<OMPC_Collapse>,
22492239
VersionedClause<OMPC_Default>,
22502240
VersionedClause<OMPC_DefaultMap>,
2251-
VersionedClause<OMPC_Message, 60>,
22522241
VersionedClause<OMPC_NoWait>,
22532242
VersionedClause<OMPC_NumThreads>,
22542243
VersionedClause<OMPC_OMPX_DynCGroupMem>,
22552244
VersionedClause<OMPC_Order>,
22562245
VersionedClause<OMPC_ProcBind>,
2257-
VersionedClause<OMPC_Severity, 60>,
22582246
VersionedClause<OMPC_ThreadLimit, 51>,
22592247
];
22602248
let leafConstructs = [OMP_Target, OMP_Parallel, OMP_loop];
@@ -2285,14 +2273,12 @@ def OMP_TargetSimd : Directive<[Spelling<"target simd">]> {
22852273
VersionedClause<OMPC_Collapse>,
22862274
VersionedClause<OMPC_DefaultMap>,
22872275
VersionedClause<OMPC_Device>,
2288-
VersionedClause<OMPC_Message, 60>,
22892276
VersionedClause<OMPC_NumThreads>,
22902277
VersionedClause<OMPC_OMPX_DynCGroupMem>,
22912278
VersionedClause<OMPC_Order, 50>,
22922279
VersionedClause<OMPC_ProcBind>,
22932280
VersionedClause<OMPC_SafeLen>,
22942281
VersionedClause<OMPC_Schedule>,
2295-
VersionedClause<OMPC_Severity, 60>,
22962282
VersionedClause<OMPC_SimdLen>,
22972283
VersionedClause<OMPC_ThreadLimit, 51>,
22982284
];
@@ -2385,14 +2371,12 @@ def OMP_TargetTeamsDistributeParallelDo
23852371
VersionedClause<OMPC_DefaultMap>,
23862372
VersionedClause<OMPC_Device>,
23872373
VersionedClause<OMPC_DistSchedule>,
2388-
VersionedClause<OMPC_Message, 60>,
23892374
VersionedClause<OMPC_NoWait>,
23902375
VersionedClause<OMPC_NumTeams>,
23912376
VersionedClause<OMPC_NumThreads>,
23922377
VersionedClause<OMPC_Order, 50>,
23932378
VersionedClause<OMPC_ProcBind>,
23942379
VersionedClause<OMPC_Schedule>,
2395-
VersionedClause<OMPC_Severity, 60>,
23962380
VersionedClause<OMPC_ThreadLimit>,
23972381
];
23982382
let leafConstructs =
@@ -2426,15 +2410,13 @@ def OMP_TargetTeamsDistributeParallelDoSimd
24262410
VersionedClause<OMPC_DefaultMap>,
24272411
VersionedClause<OMPC_Device>,
24282412
VersionedClause<OMPC_DistSchedule>,
2429-
VersionedClause<OMPC_Message, 60>,
24302413
VersionedClause<OMPC_NoWait>,
24312414
VersionedClause<OMPC_NumTeams>,
24322415
VersionedClause<OMPC_NumThreads>,
24332416
VersionedClause<OMPC_Order, 50>,
24342417
VersionedClause<OMPC_ProcBind>,
24352418
VersionedClause<OMPC_SafeLen>,
24362419
VersionedClause<OMPC_Schedule>,
2437-
VersionedClause<OMPC_Severity, 60>,
24382420
VersionedClause<OMPC_SimdLen>,
24392421
VersionedClause<OMPC_ThreadLimit>,
24402422
];
@@ -2460,7 +2442,6 @@ def OMP_TargetTeamsDistributeParallelFor
24602442
VersionedClause<OMPC_IsDevicePtr>,
24612443
VersionedClause<OMPC_LastPrivate>,
24622444
VersionedClause<OMPC_Map>,
2463-
VersionedClause<OMPC_Message, 60>,
24642445
VersionedClause<OMPC_NoWait>,
24652446
VersionedClause<OMPC_NumTeams>,
24662447
VersionedClause<OMPC_NumThreads>,
@@ -2470,7 +2451,6 @@ def OMP_TargetTeamsDistributeParallelFor
24702451
VersionedClause<OMPC_ProcBind>,
24712452
VersionedClause<OMPC_Reduction>,
24722453
VersionedClause<OMPC_Schedule>,
2473-
VersionedClause<OMPC_Severity, 60>,
24742454
VersionedClause<OMPC_Shared>,
24752455
VersionedClause<OMPC_ThreadLimit>,
24762456
VersionedClause<OMPC_UsesAllocators, 50>,
@@ -2502,7 +2482,6 @@ def OMP_TargetTeamsDistributeParallelForSimd
25022482
VersionedClause<OMPC_LastPrivate>,
25032483
VersionedClause<OMPC_Linear>,
25042484
VersionedClause<OMPC_Map>,
2505-
VersionedClause<OMPC_Message, 60>,
25062485
VersionedClause<OMPC_NonTemporal, 50>,
25072486
VersionedClause<OMPC_NoWait>,
25082487
VersionedClause<OMPC_NumTeams>,
@@ -2514,7 +2493,6 @@ def OMP_TargetTeamsDistributeParallelForSimd
25142493
VersionedClause<OMPC_Reduction>,
25152494
VersionedClause<OMPC_SafeLen>,
25162495
VersionedClause<OMPC_Schedule>,
2517-
VersionedClause<OMPC_Severity, 60>,
25182496
VersionedClause<OMPC_Shared>,
25192497
VersionedClause<OMPC_SimdLen>,
25202498
VersionedClause<OMPC_ThreadLimit>,

llvm/include/llvm/Frontend/OpenMP/OMPKinds.def

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,6 @@ __OMP_RTL(__kmpc_target_deinit, false, Void,)
472472
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
473473
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
474474
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
475-
__OMP_RTL(__kmpc_parallel_60, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
476-
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy, Int32, Int32, Int8Ptr)
477475
__OMP_RTL(__kmpc_for_static_loop_4, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8)
478476
__OMP_RTL(__kmpc_for_static_loop_4u, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8)
479477
__OMP_RTL(__kmpc_for_static_loop_8, false, Void, IdentPtr, VoidPtr, VoidPtr, Int64, Int64, Int64, Int8)
@@ -1087,10 +1085,6 @@ __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(),
10871085
ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt,
10881086
AttributeSet(), AttributeSet(), AttributeSet(),
10891087
SizeTyExt))
1090-
__OMP_RTL_ATTRS(__kmpc_parallel_60, AlwaysInlineAttrs, AttributeSet(),
1091-
ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt,
1092-
AttributeSet(), AttributeSet(), AttributeSet(),
1093-
SizeTyExt, SExt, SExt, AttributeSet()))
10941088
__OMP_RTL_ATTRS(__kmpc_serialized_parallel, InaccessibleArgOnlyAttrs,
10951089
AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs, SExt))
10961090
__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, InaccessibleArgOnlyAttrs,

openmp/device/include/DeviceTypes.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,6 @@ struct omp_lock_t {
136136
void *Lock;
137137
};
138138

139-
// see definition in openmp/runtime kmp.h
140-
typedef enum omp_severity_t {
141-
severity_warning = 1,
142-
severity_fatal = 2
143-
} omp_severity_t;
144-
145139
using InterWarpCopyFnTy = void (*)(void *src, int32_t warp_num);
146140
using ShuffleReductFnTy = void (*)(void *rhsData, int16_t lane_id,
147141
int16_t lane_offset, int16_t shortCircuit);

0 commit comments

Comments
 (0)