Skip to content

Commit 7fe13ac

Browse files
Ralph Castainjsquyres
authored andcommitted
Create a new opal_async_event_base and have the pmix/native and ORTE level use it. This reduces our thread count by one.
(cherry picked from commit open-mpi/ompi@219c4df)
1 parent 48c22c4 commit 7fe13ac

File tree

5 files changed

+38
-16
lines changed

5 files changed

+38
-16
lines changed

opal/mca/event/base/event_base_frame.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
3-
* Copyright (c) 2014 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
44
* $COPYRIGHT$
55
*
66
* Additional copyrights may follow
@@ -65,6 +65,7 @@ static int opal_event_base_close(void)
6565
* Globals
6666
*/
6767
opal_event_base_t *opal_sync_event_base=NULL;
68+
opal_event_base_t *opal_async_event_base=NULL;
6869

6970
static int opal_event_base_open(mca_base_open_flag_t flags)
7071
{

opal/mca/event/external/external.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ typedef struct event_base opal_event_base_t;
2929
typedef struct event opal_event_t;
3030

3131
OPAL_DECLSPEC extern opal_event_base_t *opal_sync_event_base;
32+
OPAL_DECLSPEC extern opal_event_base_t *opal_async_event_base;
3233

3334
#define OPAL_EV_TIMEOUT EV_TIMEOUT
3435
#define OPAL_EV_READ EV_READ

opal/mca/event/libevent2022/libevent2022.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ typedef struct event_base opal_event_base_t;
6868
typedef struct event opal_event_t;
6969

7070
OPAL_DECLSPEC extern opal_event_base_t *opal_sync_event_base;
71+
OPAL_DECLSPEC extern opal_event_base_t *opal_async_event_base;
7172

7273
#define OPAL_EV_TIMEOUT EV_TIMEOUT
7374
#define OPAL_EV_READ EV_READ

opal/runtime/opal_progress_threads.c

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
/* create a tracking object for progress threads */
2626
typedef struct {
2727
opal_list_item_t super;
28+
int refcount;
2829
char *name;
2930
opal_event_base_t *ev_base;
3031
volatile bool ev_active;
@@ -36,6 +37,7 @@ typedef struct {
3637
} opal_progress_tracker_t;
3738
static void trkcon(opal_progress_tracker_t *p)
3839
{
40+
p->refcount = 1; // start at one since someone created it
3941
p->name = NULL;
4042
p->ev_base = NULL;
4143
p->ev_active = true;
@@ -97,6 +99,21 @@ opal_event_base_t *opal_start_progress_thread(char *name,
9799
opal_progress_tracker_t *trk;
98100
int rc;
99101

102+
if (!inited) {
103+
OBJ_CONSTRUCT(&tracking, opal_list_t);
104+
inited = true;
105+
}
106+
107+
/* check if we already have this thread */
108+
OPAL_LIST_FOREACH(trk, &tracking, opal_progress_tracker_t) {
109+
if (0 == strcmp(name, trk->name)) {
110+
/* we do, so up the refcount on it */
111+
++trk->refcount;
112+
/* return the existing base */
113+
return trk->ev_base;
114+
}
115+
}
116+
100117
trk = OBJ_NEW(opal_progress_tracker_t);
101118
trk->name = strdup(name);
102119
if (NULL == (trk->ev_base = opal_event_base_create())) {
@@ -136,10 +153,6 @@ opal_event_base_t *opal_start_progress_thread(char *name,
136153
OBJ_RELEASE(trk);
137154
return NULL;
138155
}
139-
if (!inited) {
140-
OBJ_CONSTRUCT(&tracking, opal_list_t);
141-
inited = true;
142-
}
143156
opal_list_append(&tracking, &trk->super);
144157
return trk->ev_base;
145158
}
@@ -166,6 +179,12 @@ void opal_stop_progress_thread(char *name, bool cleanup)
166179
}
167180
return;
168181
}
182+
/* decrement the refcount */
183+
--trk->refcount;
184+
/* if we have reached zero, then it's time to stop it */
185+
if (0 < trk->refcount) {
186+
return;
187+
}
169188
/* mark it as inactive */
170189
trk->ev_active = false;
171190
/* break the event loop - this will cause the loop to exit
@@ -207,6 +226,8 @@ int opal_restart_progress_thread(char *name)
207226
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
208227
return OPAL_ERR_NOT_SUPPORTED;
209228
}
229+
/* up the refcount */
230+
++trk->refcount;
210231
/* ensure the block is set, if requested */
211232
if (0 <= trk->pipe[0] && !trk->block_active) {
212233
opal_event_add(&trk->block, 0);

orte/mca/ess/base/ess_base_std_app.c

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,9 @@ int orte_ess_base_app_setup(bool db_restrict_local)
107107
opal_proc_local_set(&orte_process_info.super);
108108
}
109109

110-
/* get a separate orte event base */
111-
orte_event_base = opal_start_progress_thread("orte", true);
110+
/* get an async event base - we use the opal_async one so
111+
* we don't startup extra threads if not needed */
112+
orte_event_base = opal_start_progress_thread("opal_async", true);
112113
progress_thread_running = true;
113114

114115
/* open and setup the state machine */
@@ -245,13 +246,6 @@ int orte_ess_base_app_setup(bool db_restrict_local)
245246
int orte_ess_base_app_finalize(void)
246247
{
247248

248-
/* release the event base so we stop all potential
249-
* race conditions in the messaging teardown */
250-
if (progress_thread_running) {
251-
opal_stop_progress_thread("orte", false);
252-
progress_thread_running = false;
253-
}
254-
255249
#if OPAL_ENABLE_FT_CR == 1
256250
(void) mca_base_framework_close(&orte_snapc_base_framework);
257251
(void) mca_base_framework_close(&orte_sstore_base_framework);
@@ -266,8 +260,12 @@ int orte_ess_base_app_finalize(void)
266260

267261
orte_session_dir_finalize(ORTE_PROC_MY_NAME);
268262

269-
/* free the event base to cleanup memory */
270-
opal_stop_progress_thread("orte", true);
263+
/* release the event base */
264+
if (progress_thread_running) {
265+
opal_stop_progress_thread("opal_async", true);
266+
progress_thread_running = false;
267+
}
268+
271269
return ORTE_SUCCESS;
272270
}
273271

0 commit comments

Comments
 (0)