Skip to content

Commit dc4eba5

Browse files
dhruvachakcarlobertolli
authored andcommitted
[OpenMP] [clang] Prioritize thread_limit clause over command-line option.
Change-Id: I15c68f3815148447e423a4a71c545ea36861f971
1 parent 3aca0db commit dc4eba5

File tree

2 files changed

+65
-10
lines changed

2 files changed

+65
-10
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8295,17 +8295,14 @@ int CodeGenModule::getWorkGroupSizeSPMDHelper(const OMPExecutableDirective &D) {
82958295
clang::Expr::EvalResult Result;
82968296
if (ThreadLimitExpr->EvaluateAsInt(Result, getContext())) {
82978297
int ThreadLimitEval = Result.Val.getInt().getExtValue();
8298-
if (ThreadLimitEval > 0 && ThreadLimitEval < ThreadLimit)
8298+
if (ThreadLimitEval > 0 && ThreadLimitEval <= ThreadLimit) {
82998299
ThreadLimit = ThreadLimitEval;
8300+
// Prioritize value from clause over command-line option.
8301+
WorkGroupSz = ThreadLimit;
8302+
}
83008303
}
83018304
}
83028305

8303-
// If the command line work group size is less than any default or user
8304-
// specified thread limit then it is honored otherwise the thread limit
8305-
// determined above will be used.
8306-
if (WorkGroupSz > ThreadLimit)
8307-
WorkGroupSz = ThreadLimit;
8308-
83098306
// Set the actual number of threads if the user requests a value different
83108307
// then the default. If the value is greater than the currently computed
83118308
// thread limit then cap the number of threads to the thread limit.

openmp/libomptarget/test/offloading/thread_limit.c

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,78 @@
1010
// UNSUPPORTED: x86_64-pc-linux-gnu
1111
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
1212

13+
// clang-format on
1314
int main() {
1415
int n = 1 << 20;
1516
int th = 12;
1617
int te = n / th;
18+
1719
// DEFAULT: 12 (MaxFlatWorkGroupSize:
1820
#pragma omp target
1921
#pragma omp teams loop num_teams(te), thread_limit(th)
2022
for (int i = 0; i < n; i++) {
2123
}
2224

23-
// DEFAULT: 13 (MaxFlatWorkGroupSize:
24-
#pragma omp target
25-
#pragma omp teams distribute parallel for simd num_teams(te), thread_limit(th+1) simdlen(64)
25+
// DEFAULT: 13 (MaxFlatWorkGroupSize:
26+
#pragma omp target
27+
#pragma omp teams distribute parallel for simd num_teams(te), \
28+
thread_limit(th + 1) simdlen(64)
2629
for(int i = 0; i < n; i++) {
2730
}
31+
32+
// DEFAULT: 128 (MaxFlatWorkGroupSize:
33+
#pragma omp target teams distribute parallel for thread_limit(128)
34+
for (int i = 0; i < n; i++) {
35+
}
36+
37+
// DEFAULT: 512 (MaxFlatWorkGroupSize:
38+
#pragma omp target teams distribute parallel for thread_limit(512)
39+
for (int i = 0; i < n; i++) {
40+
}
41+
42+
// DEFAULT: 1024 (MaxFlatWorkGroupSize:
43+
#pragma omp target teams distribute parallel for thread_limit(1024)
44+
for (int i = 0; i < n; i++) {
45+
}
46+
47+
// DEFAULT: 128 (MaxFlatWorkGroupSize:
48+
#pragma omp target teams distribute parallel for num_threads(128)
49+
for (int i = 0; i < n; i++) {
50+
}
51+
52+
// DEFAULT: 512 (MaxFlatWorkGroupSize:
53+
#pragma omp target teams distribute parallel for num_threads(512)
54+
for (int i = 0; i < n; i++) {
55+
}
56+
57+
// DEFAULT: 1024 (MaxFlatWorkGroupSize:
58+
#pragma omp target teams distribute parallel for num_threads(1024)
59+
for (int i = 0; i < n; i++) {
60+
}
61+
62+
// DEFAULT: 64 (MaxFlatWorkGroupSize:
63+
#pragma omp target teams distribute parallel for thread_limit(64) \
64+
num_threads(128)
65+
for (int i = 0; i < n; i++) {
66+
}
67+
68+
// DEFAULT: 64 (MaxFlatWorkGroupSize:
69+
#pragma omp target teams distribute parallel for thread_limit(128) \
70+
num_threads(64)
71+
for (int i = 0; i < n; i++) {
72+
}
73+
74+
// DEFAULT: 512 (MaxFlatWorkGroupSize:
75+
#pragma omp target teams distribute parallel for thread_limit(512) \
76+
num_threads(1024)
77+
for (int i = 0; i < n; i++) {
78+
}
79+
80+
// DEFAULT: 512 (MaxFlatWorkGroupSize:
81+
#pragma omp target teams distribute parallel for thread_limit(1024) \
82+
num_threads(512)
83+
for (int i = 0; i < n; i++) {
84+
}
85+
2886
return 0;
2987
}

0 commit comments

Comments
 (0)