Skip to content

Commit ea34d95

Browse files
author
Peyton, Jonathan L
committed
[OpenMP] Introduce GOMP teams support in runtime
Implement GOMP_teams_reg() function which enables GOMP support of the standalone teams construct. The GOMP_parallel* functions were modified to call __kmp_fork_call() unconditionally so that the teams-specific code could be reused within __kmp_fork_call() instead of reproduced inside the GOMP_* functions. Differential Revision: https://reviews.llvm.org/D87167
1 parent 00ee52a commit ea34d95

File tree

4 files changed

+134
-106
lines changed

4 files changed

+134
-106
lines changed

openmp/runtime/src/kmp_ftn_os.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,5 +679,6 @@
679679
GOMP_parallel_loop_nonmonotonic_runtime
680680
#define KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME \
681681
GOMP_parallel_loop_maybe_nonmonotonic_runtime
682+
#define KMP_API_NAME_GOMP_TEAMS_REG GOMP_teams_reg
682683

683684
#endif /* KMP_FTN_OS_H */

openmp/runtime/src/kmp_gsupport.cpp

Lines changed: 61 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,9 @@ static
361361
#endif
362362
}
363363

364-
#ifndef KMP_DEBUG
365-
static
366-
#endif /* KMP_DEBUG */
367-
void
368-
__kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *),
369-
microtask_t wrapper, int argc, ...) {
364+
static void __kmp_GOMP_fork_call(ident_t *loc, int gtid, unsigned num_threads,
365+
unsigned flags, void (*unwrapped_task)(void *),
366+
microtask_t wrapper, int argc, ...) {
370367
int rc;
371368
kmp_info_t *thr = __kmp_threads[gtid];
372369
kmp_team_t *team = thr->th.th_team;
@@ -375,6 +372,10 @@ static
375372
va_list ap;
376373
va_start(ap, argc);
377374

375+
if (num_threads != 0)
376+
__kmp_push_num_threads(loc, gtid, num_threads);
377+
if (flags != 0)
378+
__kmp_push_proc_bind(loc, gtid, (kmp_proc_bind_t)flags);
378379
rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper,
379380
__kmp_invoke_task_func, kmp_va_addr_of(ap));
380381

@@ -403,14 +404,6 @@ static
403404
#endif
404405
}
405406

406-
static void __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid,
407-
void (*task)(void *)) {
408-
#if OMPT_SUPPORT
409-
OMPT_STORE_RETURN_ADDRESS(gtid);
410-
#endif
411-
__kmp_serialized_parallel(loc, gtid);
412-
}
413-
414407
void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),
415408
void *data,
416409
unsigned num_threads) {
@@ -428,18 +421,9 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),
428421

429422
MKLOC(loc, "GOMP_parallel_start");
430423
KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid));
431-
432-
if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
433-
if (num_threads != 0) {
434-
__kmp_push_num_threads(&loc, gtid, num_threads);
435-
}
436-
__kmp_GOMP_fork_call(&loc, gtid, task,
437-
(microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
438-
data);
439-
} else {
440-
__kmp_GOMP_serialized_parallel(&loc, gtid, task);
441-
}
442-
424+
__kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,
425+
(microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
426+
data);
443427
#if OMPT_SUPPORT
444428
if (ompt_enabled.enabled) {
445429
__ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL);
@@ -460,25 +444,22 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
460444
if (!thr->th.th_team->t.t_serialized) {
461445
__kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
462446
thr->th.th_team);
463-
447+
}
464448
#if OMPT_SUPPORT
465-
if (ompt_enabled.enabled) {
466-
// Implicit task is finished here, in the barrier we might schedule
467-
// deferred tasks,
468-
// these don't see the implicit task on the stack
469-
OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none;
470-
}
449+
if (ompt_enabled.enabled) {
450+
// Implicit task is finished here, in the barrier we might schedule
451+
// deferred tasks,
452+
// these don't see the implicit task on the stack
453+
OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none;
454+
}
471455
#endif
472456

473-
__kmp_join_call(&loc, gtid
457+
__kmp_join_call(&loc, gtid
474458
#if OMPT_SUPPORT
475-
,
476-
fork_context_gnu
459+
,
460+
fork_context_gnu
477461
#endif
478-
);
479-
} else {
480-
__kmpc_end_serialized_parallel(&loc, gtid);
481-
}
462+
);
482463
}
483464

484465
// Loop worksharing constructs
@@ -1073,19 +1054,11 @@ LOOP_DOACROSS_RUNTIME_START_ULL(
10731054
\
10741055
ompt_pre(); \
10751056
\
1076-
if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \
1077-
if (num_threads != 0) { \
1078-
__kmp_push_num_threads(&loc, gtid, num_threads); \
1079-
} \
1080-
__kmp_GOMP_fork_call(&loc, gtid, task, \
1081-
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \
1082-
9, task, data, num_threads, &loc, (schedule), lb, \
1083-
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \
1084-
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \
1085-
} else { \
1086-
__kmp_GOMP_serialized_parallel(&loc, gtid, task); \
1087-
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \
1088-
} \
1057+
__kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task, \
1058+
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \
1059+
9, task, data, num_threads, &loc, (schedule), lb, \
1060+
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \
1061+
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \
10891062
\
10901063
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
10911064
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
@@ -1332,17 +1305,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(
13321305
MKLOC(loc, "GOMP_parallel_sections_start");
13331306
KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid));
13341307

1335-
if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
1336-
if (num_threads != 0) {
1337-
__kmp_push_num_threads(&loc, gtid, num_threads);
1338-
}
1339-
__kmp_GOMP_fork_call(&loc, gtid, task,
1340-
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1341-
task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1342-
(kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
1343-
} else {
1344-
__kmp_GOMP_serialized_parallel(&loc, gtid, task);
1345-
}
1308+
__kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,
1309+
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1310+
task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1311+
(kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
13461312

13471313
#if OMPT_SUPPORT
13481314
if (ompt_enabled.enabled) {
@@ -1403,19 +1369,9 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *),
14031369
OMPT_STORE_RETURN_ADDRESS(gtid);
14041370
}
14051371
#endif
1406-
if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
1407-
if (num_threads != 0) {
1408-
__kmp_push_num_threads(&loc, gtid, num_threads);
1409-
}
1410-
if (flags != 0) {
1411-
__kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);
1412-
}
1413-
__kmp_GOMP_fork_call(&loc, gtid, task,
1414-
(microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
1415-
data);
1416-
} else {
1417-
__kmp_GOMP_serialized_parallel(&loc, gtid, task);
1418-
}
1372+
__kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
1373+
(microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
1374+
data);
14191375
#if OMPT_SUPPORT
14201376
if (ompt_enabled.enabled) {
14211377
task_info = __ompt_get_task_info_object(0);
@@ -1450,20 +1406,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
14501406
OMPT_STORE_RETURN_ADDRESS(gtid);
14511407
#endif
14521408

1453-
if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
1454-
if (num_threads != 0) {
1455-
__kmp_push_num_threads(&loc, gtid, num_threads);
1456-
}
1457-
if (flags != 0) {
1458-
__kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);
1459-
}
1460-
__kmp_GOMP_fork_call(&loc, gtid, task,
1461-
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1462-
task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1463-
(kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
1464-
} else {
1465-
__kmp_GOMP_serialized_parallel(&loc, gtid, task);
1466-
}
1409+
__kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
1410+
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1411+
task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1412+
(kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
14671413

14681414
#if OMPT_SUPPORT
14691415
OMPT_STORE_RETURN_ADDRESS(gtid);
@@ -1488,20 +1434,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
14881434
gtid, lb, ub, str, chunk_sz)); \
14891435
\
14901436
ompt_pre(); \
1491-
if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \
1492-
if (num_threads != 0) { \
1493-
__kmp_push_num_threads(&loc, gtid, num_threads); \
1494-
} \
1495-
if (flags != 0) { \
1496-
__kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); \
1497-
} \
1498-
__kmp_GOMP_fork_call(&loc, gtid, task, \
1499-
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \
1500-
9, task, data, num_threads, &loc, (schedule), lb, \
1501-
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \
1502-
} else { \
1503-
__kmp_GOMP_serialized_parallel(&loc, gtid, task); \
1504-
} \
1437+
__kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task, \
1438+
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \
1439+
9, task, data, num_threads, &loc, (schedule), lb, \
1440+
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \
15051441
\
15061442
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
15071443
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
@@ -1856,6 +1792,25 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)(
18561792
va_end(args);
18571793
}
18581794

1795+
// fn: the function each master thread of new team will call
1796+
// data: argument to fn
1797+
// num_teams, thread_limit: max bounds on respective ICV
1798+
// flags: unused
1799+
void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS_REG)(void (*fn)(void *),
1800+
void *data,
1801+
unsigned num_teams,
1802+
unsigned thread_limit,
1803+
unsigned flags) {
1804+
MKLOC(loc, "GOMP_teams_reg");
1805+
int gtid = __kmp_entry_gtid();
1806+
KA_TRACE(20, ("GOMP_teams_reg: T#%d num_teams=%u thread_limit=%u flag=%u\n",
1807+
gtid, num_teams, thread_limit, flags));
1808+
__kmpc_push_num_teams(&loc, gtid, num_teams, thread_limit);
1809+
__kmpc_fork_teams(&loc, 2, (microtask_t)__kmp_GOMP_microtask_wrapper, fn,
1810+
data);
1811+
KA_TRACE(20, ("GOMP_teams_reg exit: T#%d\n", gtid));
1812+
}
1813+
18591814
/* The following sections of code create aliases for the GOMP_* functions, then
18601815
create versioned symbols using the assembler directive .symver. This is only
18611816
pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in
@@ -2027,6 +1982,7 @@ KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME, 50,
20271982
"GOMP_5.0");
20281983
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME,
20291984
50, "GOMP_5.0");
1985+
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS_REG, 50, "GOMP_5.0");
20301986

20311987
#endif // KMP_USE_VERSION_SYMBOLS
20321988

openmp/runtime/src/kmp_runtime.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1506,6 +1506,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
15061506
__kmpc_serialized_parallel(loc, gtid);
15071507
KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
15081508

1509+
if (call_context == fork_context_gnu) {
1510+
// AC: need to decrement t_serialized for enquiry functions to work
1511+
// correctly, will restore at join time
1512+
parent_team->t.t_serialized--;
1513+
return TRUE;
1514+
}
1515+
15091516
#if OMPT_SUPPORT
15101517
void *dummy;
15111518
void **exit_frame_p;
@@ -1638,6 +1645,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
16381645
"master_th=%p, gtid=%d\n",
16391646
root, parent_team, master_th, gtid));
16401647

1648+
if (call_context == fork_context_gnu)
1649+
return TRUE;
1650+
16411651
/* Invoke microtask for MASTER thread */
16421652
KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
16431653
parent_team->t.t_id, parent_team->t.t_pkfn));
@@ -2293,7 +2303,11 @@ void __kmp_join_call(ident_t *loc, int gtid
22932303

22942304
#if OMPT_SUPPORT
22952305
void *team_microtask = (void *)team->t.t_pkfn;
2296-
if (ompt_enabled.enabled) {
2306+
// For GOMP interface with serialized parallel, need the
2307+
// __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
2308+
// and end-parallel events.
2309+
if (ompt_enabled.enabled &&
2310+
!(team->t.t_serialized && fork_context == fork_context_gnu)) {
22972311
master_th->th.ompt_thread_info.state = ompt_state_overhead;
22982312
}
22992313
#endif

openmp/runtime/test/teams/teams.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: %libomp-compile-and-run
2+
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8
3+
// UNSUPPORTED: icc, clang
4+
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
#include <omp.h>
8+
9+
#define NUM_TEAMS 2
10+
#define NUM_THREADS_PER_TEAM 3
11+
12+
int main(int argc, char** argv) {
13+
#pragma omp teams num_teams(NUM_TEAMS)
14+
{
15+
int i;
16+
int members[NUM_THREADS_PER_TEAM];
17+
// Only an upper bound is guaranteed for number of teams
18+
int nteams = omp_get_num_teams();
19+
if (nteams > NUM_TEAMS) {
20+
fprintf(stderr, "error: too many teams: %d\n", nteams);
21+
exit(1);
22+
}
23+
for (i = 0; i < NUM_THREADS_PER_TEAM; ++i)
24+
members[i] = -1;
25+
#pragma omp parallel num_threads(NUM_THREADS_PER_TEAM) private(i)
26+
{
27+
int tid = omp_get_thread_num();
28+
int team_id = omp_get_team_num();
29+
int nthreads = omp_get_num_threads();
30+
if (nthreads != NUM_THREADS_PER_TEAM) {
31+
fprintf(stderr, "error: detected number of threads (%d) is not %d\n",
32+
nthreads, NUM_THREADS_PER_TEAM);
33+
exit(1);
34+
}
35+
if (tid < 0 || tid >= nthreads) {
36+
fprintf(stderr, "error: thread id is out of range: %d\n", tid);
37+
exit(1);
38+
}
39+
if (team_id < 0 || team_id > omp_get_num_teams()) {
40+
fprintf(stderr, "error: team id is out of range: %d\n", team_id);
41+
exit(1);
42+
}
43+
members[omp_get_thread_num()] = 1;
44+
#pragma omp barrier
45+
#pragma omp single
46+
{
47+
for (i = 0; i < NUM_THREADS_PER_TEAM; ++i) {
48+
if (members[i] != 1) {
49+
fprintf(stderr, "error: worker %d not flagged\n", i);
50+
exit(1);
51+
}
52+
}
53+
}
54+
}
55+
}
56+
return 0;
57+
}

0 commit comments

Comments
 (0)