Skip to content

Commit cbbd4db

Browse files
ro-imahesh-attarde
authored andcommitted
[OpenMP][clang] Set num_threads 'strict' to unsupported on GPUs (llvm#160659)
Setting the prescriptiveness of the num_threads clause to 'strict' and having a corresponding check (with message and severity clauses) does not align well with how OpenMP should be handled for GPUs. The num_threads expression may be an arbitrary integer expression which is evaluated on the target, in correspondance to the OpenMP spec. This prevents the check from being done before launching the kernel, especially considering that the num_threads clause is associated with the parallel directive and that there may be multiple parallel directives with different num_threads clauses in a single target region. Acting on the result of the 'strict' check on the GPU would require doing I/O on the GPU, which can introduce performance regressions. Delaying any actions resulting from the 'strict' check and doing them on the host after executing the target region involves additional data copies and is not really semantically correct. For now, the 'strict' modifier for the num_threads clause and its associated message and severity clause are set to be unsupported on GPUs. Targets other than GPUs still support the aforementioned features in the context of an OpenMP target region.
1 parent ce14b3c commit cbbd4db

14 files changed

+8217
-13130
lines changed

clang/include/clang/Basic/DiagnosticCommonKinds.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,12 @@ def err_omp_more_one_clause : Error<
433433
"directive '#pragma omp %0' cannot contain more than one '%1' clause%select{| with '%3' name modifier| with 'source' dependence}2">;
434434
def err_omp_required_clause : Error<
435435
"directive '#pragma omp %0' requires the '%1' clause">;
436+
def warn_omp_gpu_unsupported_clause: Warning<
437+
"clause '%0' is currently not supported on a GPU; clause ignored">,
438+
InGroup<OpenMPClauses>;
439+
def warn_omp_gpu_unsupported_modifier_for_clause: Warning<
440+
"modifier '%0' is currently not supported on a GPU for the '%1' clause; modifier ignored">,
441+
InGroup<OpenMPClauses>;
436442

437443
// Static Analyzer Core
438444
def err_unknown_analyzer_checker_or_package : Error<

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2703,7 +2703,8 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
27032703
}
27042704

27052705
llvm::Value *CGOpenMPRuntime::emitMessageClause(CodeGenFunction &CGF,
2706-
const Expr *Message) {
2706+
const Expr *Message,
2707+
SourceLocation Loc) {
27072708
if (!Message)
27082709
return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
27092710
return CGF.EmitScalarExpr(Message);
@@ -2713,11 +2714,13 @@ llvm::Value *
27132714
CGOpenMPRuntime::emitMessageClause(CodeGenFunction &CGF,
27142715
const OMPMessageClause *MessageClause) {
27152716
return emitMessageClause(
2716-
CGF, MessageClause ? MessageClause->getMessageString() : nullptr);
2717+
CGF, MessageClause ? MessageClause->getMessageString() : nullptr,
2718+
MessageClause->getBeginLoc());
27172719
}
27182720

27192721
llvm::Value *
2720-
CGOpenMPRuntime::emitSeverityClause(OpenMPSeverityClauseKind Severity) {
2722+
CGOpenMPRuntime::emitSeverityClause(OpenMPSeverityClauseKind Severity,
2723+
SourceLocation Loc) {
27212724
// OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
27222725
// as if sev-level is fatal."
27232726
return llvm::ConstantInt::get(CGM.Int32Ty,
@@ -2727,13 +2730,15 @@ CGOpenMPRuntime::emitSeverityClause(OpenMPSeverityClauseKind Severity) {
27272730
llvm::Value *
27282731
CGOpenMPRuntime::emitSeverityClause(const OMPSeverityClause *SeverityClause) {
27292732
return emitSeverityClause(SeverityClause ? SeverityClause->getSeverityKind()
2730-
: OMPC_SEVERITY_unknown);
2733+
: OMPC_SEVERITY_unknown,
2734+
SeverityClause->getBeginLoc());
27312735
}
27322736

27332737
void CGOpenMPRuntime::emitNumThreadsClause(
27342738
CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
27352739
OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
2736-
const Expr *Message) {
2740+
SourceLocation SeverityLoc, const Expr *Message,
2741+
SourceLocation MessageLoc) {
27372742
if (!CGF.HaveInsertPoint())
27382743
return;
27392744
llvm::SmallVector<llvm::Value *, 4> Args(
@@ -2745,8 +2750,8 @@ void CGOpenMPRuntime::emitNumThreadsClause(
27452750
RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
27462751
if (Modifier == OMPC_NUMTHREADS_strict) {
27472752
FnID = OMPRTL___kmpc_push_num_threads_strict;
2748-
Args.push_back(emitSeverityClause(Severity));
2749-
Args.push_back(emitMessageClause(CGF, Message));
2753+
Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2754+
Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
27502755
}
27512756
CGF.EmitRuntimeCall(
27522757
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
@@ -12654,7 +12659,8 @@ llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
1265412659
void CGOpenMPSIMDRuntime::emitNumThreadsClause(
1265512660
CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
1265612661
OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
12657-
const Expr *Message) {
12662+
SourceLocation SeverityLoc, const Expr *Message,
12663+
SourceLocation MessageLoc) {
1265812664
llvm_unreachable("Not supported in SIMD-only mode");
1265912665
}
1266012666

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,11 +1049,13 @@ class CGOpenMPRuntime {
10491049
Address UB, Address ST);
10501050

10511051
virtual llvm::Value *emitMessageClause(CodeGenFunction &CGF,
1052-
const Expr *Message);
1052+
const Expr *Message,
1053+
SourceLocation Loc);
10531054
virtual llvm::Value *emitMessageClause(CodeGenFunction &CGF,
10541055
const OMPMessageClause *MessageClause);
10551056

1056-
virtual llvm::Value *emitSeverityClause(OpenMPSeverityClauseKind Severity);
1057+
virtual llvm::Value *emitSeverityClause(OpenMPSeverityClauseKind Severity,
1058+
SourceLocation Loc);
10571059
virtual llvm::Value *
10581060
emitSeverityClause(const OMPSeverityClause *SeverityClause);
10591061

@@ -1069,7 +1071,9 @@ class CGOpenMPRuntime {
10691071
CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
10701072
OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown,
10711073
OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal,
1072-
const Expr *Message = nullptr);
1074+
SourceLocation SeverityLoc = SourceLocation(),
1075+
const Expr *Message = nullptr,
1076+
SourceLocation MessageLoc = SourceLocation());
10731077

10741078
/// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
10751079
/// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
@@ -1956,7 +1960,9 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
19561960
CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
19571961
OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown,
19581962
OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal,
1959-
const Expr *Message = nullptr) override;
1963+
SourceLocation SeverityLoc = SourceLocation(),
1964+
const Expr *Message = nullptr,
1965+
SourceLocation MessageLoc = SourceLocation()) override;
19601966

19611967
/// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
19621968
/// global_tid, int proc_bind) to generate code for 'proc_bind' clause.

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,10 +899,34 @@ void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,
899899
// Nothing to do.
900900
}
901901

902+
llvm::Value *CGOpenMPRuntimeGPU::emitMessageClause(CodeGenFunction &CGF,
903+
const Expr *Message,
904+
SourceLocation Loc) {
905+
CGM.getDiags().Report(Loc, diag::warn_omp_gpu_unsupported_clause)
906+
<< getOpenMPClauseName(OMPC_message);
907+
return nullptr;
908+
}
909+
910+
llvm::Value *
911+
CGOpenMPRuntimeGPU::emitSeverityClause(OpenMPSeverityClauseKind Severity,
912+
SourceLocation Loc) {
913+
CGM.getDiags().Report(Loc, diag::warn_omp_gpu_unsupported_clause)
914+
<< getOpenMPClauseName(OMPC_severity);
915+
return nullptr;
916+
}
917+
902918
void CGOpenMPRuntimeGPU::emitNumThreadsClause(
903919
CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
904920
OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
905-
const Expr *Message) {
921+
SourceLocation SeverityLoc, const Expr *Message,
922+
SourceLocation MessageLoc) {
923+
if (Modifier == OMPC_NUMTHREADS_strict) {
924+
CGM.getDiags().Report(Loc,
925+
diag::warn_omp_gpu_unsupported_modifier_for_clause)
926+
<< "strict" << getOpenMPClauseName(OMPC_num_threads);
927+
return;
928+
}
929+
906930
// Nothing to do.
907931
}
908932

clang/lib/CodeGen/CGOpenMPRuntimeGPU.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,14 +162,24 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
162162
llvm::omp::ProcBindKind ProcBind,
163163
SourceLocation Loc) override;
164164

165+
// Currently unsupported on the device.
166+
llvm::Value *emitMessageClause(CodeGenFunction &CGF, const Expr *Message,
167+
SourceLocation Loc) override;
168+
169+
// Currently unsupported on the device.
170+
virtual llvm::Value *emitSeverityClause(OpenMPSeverityClauseKind Severity,
171+
SourceLocation Loc) override;
172+
165173
/// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
166174
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
167175
/// clause.
168176
void emitNumThreadsClause(
169177
CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
170178
OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown,
171179
OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal,
172-
const Expr *Message = nullptr) override;
180+
SourceLocation SeverityLoc = SourceLocation(),
181+
const Expr *Message = nullptr,
182+
SourceLocation MessageLoc = SourceLocation()) override;
173183

174184
/// This function ought to emit, in the general case, a call to
175185
// the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1622,22 +1622,30 @@ static void emitCommonOMPParallelDirective(
16221622
// if sev-level is fatal."
16231623
OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal;
16241624
clang::Expr *Message = nullptr;
1625+
SourceLocation SeverityLoc = SourceLocation();
1626+
SourceLocation MessageLoc = SourceLocation();
1627+
16251628
llvm::Function *OutlinedFn =
16261629
CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
16271630
CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
16281631
CodeGen);
1632+
16291633
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
16301634
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
16311635
NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
16321636
/*IgnoreResultAssign=*/true);
16331637
Modifier = NumThreadsClause->getModifier();
1634-
if (const auto *MessageClause = S.getSingleClause<OMPMessageClause>())
1638+
if (const auto *MessageClause = S.getSingleClause<OMPMessageClause>()) {
16351639
Message = MessageClause->getMessageString();
1636-
if (const auto *SeverityClause = S.getSingleClause<OMPSeverityClause>())
1640+
MessageLoc = MessageClause->getBeginLoc();
1641+
}
1642+
if (const auto *SeverityClause = S.getSingleClause<OMPSeverityClause>()) {
16371643
Severity = SeverityClause->getSeverityKind();
1644+
SeverityLoc = SeverityClause->getBeginLoc();
1645+
}
16381646
CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
16391647
CGF, NumThreads, NumThreadsClause->getBeginLoc(), Modifier, Severity,
1640-
Message);
1648+
SeverityLoc, Message, MessageLoc);
16411649
}
16421650
if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
16431651
CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// RUN: %clang_cc1 -DF1 -verify -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host-ppc.bc
2+
// RUN: %clang_cc1 -DF1 -DTARGET -verify -fopenmp -fopenmp-version=60 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-ppc.bc -o /dev/null
3+
// RUN: %clang_cc1 -DF2 -verify -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host-ppc.bc
4+
// RUN: %clang_cc1 -DF2 -DTARGET -verify -fopenmp -fopenmp-version=60 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-ppc.bc -o /dev/null
5+
// RUN: %clang_cc1 -DF3 -verify -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host-ppc.bc
6+
// RUN: %clang_cc1 -DF3 -DTARGET -verify -fopenmp -fopenmp-version=60 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-ppc.bc -o /dev/null
7+
8+
#ifndef TARGET
9+
// expected-no-diagnostics
10+
#endif
11+
12+
#ifdef F3
13+
template<typename tx>
14+
tx ftemplate(int n) {
15+
tx a = 0;
16+
17+
#ifdef TARGET
18+
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
19+
#endif
20+
#pragma omp parallel num_threads(strict: tx(20)) severity(fatal) message("msg")
21+
{
22+
}
23+
24+
short b = 1;
25+
#ifdef TARGET
26+
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
27+
#endif
28+
#pragma omp parallel num_threads(strict: b) severity(warning) message("msg")
29+
{
30+
a += b;
31+
}
32+
33+
return a;
34+
}
35+
#endif
36+
37+
#ifdef F2
38+
static
39+
int fstatic(int n) {
40+
41+
#ifdef TARGET
42+
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
43+
#endif
44+
#pragma omp target parallel num_threads(strict: n) message("msg")
45+
{
46+
}
47+
48+
#ifdef TARGET
49+
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
50+
#endif
51+
#pragma omp target parallel num_threads(strict: 32+n) severity(warning)
52+
{
53+
}
54+
55+
return n+1;
56+
}
57+
#endif
58+
59+
#ifdef F1
60+
struct S1 {
61+
double a;
62+
63+
int r1(int n){
64+
int b = 1;
65+
66+
#ifdef TARGET
67+
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
68+
#endif
69+
#pragma omp parallel num_threads(strict: n-b) severity(warning) message("msg")
70+
{
71+
this->a = (double)b + 1.5;
72+
}
73+
74+
#ifdef TARGET
75+
// expected-warning@+2 {{modifier 'strict' is currently not supported on a GPU for the 'num_threads' clause; modifier ignored}}
76+
#endif
77+
#pragma omp parallel num_threads(strict: 1024) severity(fatal)
78+
{
79+
this->a = 2.5;
80+
}
81+
82+
return (int)a;
83+
}
84+
};
85+
#endif
86+
87+
int bar(int n){
88+
int a = 0;
89+
90+
#ifdef F1
91+
#pragma omp target
92+
{
93+
S1 S;
94+
a += S.r1(n);
95+
}
96+
#endif
97+
98+
#ifdef F2
99+
a += fstatic(n);
100+
#endif
101+
102+
#ifdef F3
103+
#pragma omp target
104+
a += ftemplate<int>(n);
105+
#endif
106+
107+
return a;
108+
}

0 commit comments

Comments
 (0)