Skip to content

Commit 952726c

Browse files
author
Ralph Castain
committed
Update to latest PMIx master - equivalent to 2.0rc2. Update the thread support in the opal/pmix framework to protect the framework-level structures.
This now passes the loop test, and so we believe it resolves the random hangs in finalize. Changes in PMIx master that are included here: * Fixed a bug in the PMIx_Get logic * Fixed self-notification procedure * Made pmix_output functions thread safe * Fixed a number of thread safety issues * Updated configury to use 'uname -n' when hostname is unavailable Work on cleaning up the event handler thread safety problem Rarely used functions, but protect them anyway Fix the last part of the intercomm problem Ensure we don't cover any PMIx calls with the framework-level lock. Protect against NULL argv comm_spawn Signed-off-by: Ralph Castain <[email protected]>
1 parent 70107b3 commit 952726c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+2436
-1649
lines changed

ompi/interlib/interlib.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,9 @@ int ompi_interlib_declare(int threadlevel, char *version)
155155
}
156156
opal_list_append(&info, &kv->super);
157157
/* call pmix to initialize these values */
158-
if (OPAL_SUCCESS != (ret = opal_pmix.init(&info))) {
159-
OPAL_LIST_DESTRUCT(&info);
160-
return ret;
161-
}
158+
ret = opal_pmix.init(&info);
162159
OPAL_LIST_DESTRUCT(&info);
163-
return OMPI_SUCCESS;
160+
/* account for our refcount on pmix_init */
161+
opal_pmix.finalize();
162+
return ret;
164163
}

ompi/runtime/ompi_mpi_finalize.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,9 +277,6 @@ int ompi_mpi_finalize(void)
277277
}
278278
}
279279

280-
/* account for our refcount on pmix_init */
281-
opal_pmix.finalize();
282-
283280
/* check for timing request - get stop time and report elapsed
284281
time if so */
285282
//OPAL_TIMING_DELTAS(ompi_enable_timing, &tm);

opal/mca/pmix/base/base.h

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
#include "opal_config.h"
1616
#include "opal/types.h"
17-
17+
#include "opal/threads/threads.h"
1818
#include "opal/mca/mca.h"
1919
#include "opal/mca/base/mca_base_framework.h"
2020

@@ -55,13 +55,133 @@ OPAL_DECLSPEC int opal_pmix_base_exchange(opal_value_t *info,
5555

5656
OPAL_DECLSPEC void opal_pmix_base_set_evbase(opal_event_base_t *evbase);
5757

58+
#define opal_pmix_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread)
59+
typedef pthread_cond_t opal_pmix_condition_t;
60+
#define opal_pmix_condition_broadcast(a) pthread_cond_broadcast(a)
61+
#define opal_pmix_condition_signal(a) pthread_cond_signal(a)
62+
#define OPAL_PMIX_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER
63+
64+
typedef struct {
65+
opal_mutex_t mutex;
66+
opal_pmix_condition_t cond;
67+
volatile bool active;
68+
} opal_pmix_lock_t;
69+
70+
5871
typedef struct {
5972
opal_event_base_t *evbase;
6073
int timeout;
74+
int initialized;
75+
opal_pmix_lock_t lock;
6176
} opal_pmix_base_t;
6277

6378
extern opal_pmix_base_t opal_pmix_base;
6479

80+
#define OPAL_PMIX_CONSTRUCT_LOCK(l) \
81+
do { \
82+
OBJ_CONSTRUCT(&(l)->mutex, opal_mutex_t); \
83+
pthread_cond_init(&(l)->cond, NULL); \
84+
(l)->active = true; \
85+
} while(0)
86+
87+
#define OPAL_PMIX_DESTRUCT_LOCK(l) \
88+
do { \
89+
OBJ_DESTRUCT(&(l)->mutex); \
90+
pthread_cond_destroy(&(l)->cond); \
91+
} while(0)
92+
93+
94+
#if OPAL_ENABLE_DEBUG
95+
#define OPAL_PMIX_ACQUIRE_THREAD(lck) \
96+
do { \
97+
opal_mutex_lock(&(lck)->mutex); \
98+
if (opal_debug_threads) { \
99+
opal_output(0, "Waiting for thread %s:%d", \
100+
__FILE__, __LINE__); \
101+
} \
102+
while ((lck)->active) { \
103+
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
104+
} \
105+
if (opal_debug_threads) { \
106+
opal_output(0, "Thread obtained %s:%d", \
107+
__FILE__, __LINE__); \
108+
} \
109+
(lck)->active = true; \
110+
} while(0)
111+
#else
112+
#define OPAL_PMIX_ACQUIRE_THREAD(lck) \
113+
do { \
114+
opal_mutex_lock(&(lck)->mutex); \
115+
while ((lck)->active) { \
116+
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
117+
} \
118+
(lck)->active = true; \
119+
} while(0)
120+
#endif
121+
122+
123+
#if OPAL_ENABLE_DEBUG
124+
#define OPAL_PMIX_WAIT_THREAD(lck) \
125+
do { \
126+
opal_mutex_lock(&(lck)->mutex); \
127+
if (opal_debug_threads) { \
128+
opal_output(0, "Waiting for thread %s:%d", \
129+
__FILE__, __LINE__); \
130+
} \
131+
while ((lck)->active) { \
132+
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
133+
} \
134+
if (opal_debug_threads) { \
135+
opal_output(0, "Thread obtained %s:%d", \
136+
__FILE__, __LINE__); \
137+
} \
138+
OPAL_ACQUIRE_OBJECT(&lck); \
139+
opal_mutex_unlock(&(lck)->mutex); \
140+
} while(0)
141+
#else
142+
#define OPAL_PMIX_WAIT_THREAD(lck) \
143+
do { \
144+
opal_mutex_lock(&(lck)->mutex); \
145+
while ((lck)->active) { \
146+
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
147+
} \
148+
OPAL_ACQUIRE_OBJECT(lck); \
149+
opal_mutex_unlock(&(lck)->mutex); \
150+
} while(0)
151+
#endif
152+
153+
154+
#if OPAL_ENABLE_DEBUG
155+
#define OPAL_PMIX_RELEASE_THREAD(lck) \
156+
do { \
157+
if (opal_debug_threads) { \
158+
opal_output(0, "Releasing thread %s:%d", \
159+
__FILE__, __LINE__); \
160+
} \
161+
(lck)->active = false; \
162+
opal_pmix_condition_broadcast(&(lck)->cond); \
163+
opal_mutex_unlock(&(lck)->mutex); \
164+
} while(0)
165+
#else
166+
#define OPAL_PMIX_RELEASE_THREAD(lck) \
167+
do { \
168+
assert(0 != opal_mutex_trylock(&(lck)->mutex)); \
169+
(lck)->active = false; \
170+
opal_pmix_condition_broadcast(&(lck)->cond); \
171+
opal_mutex_unlock(&(lck)->mutex); \
172+
} while(0)
173+
#endif
174+
175+
176+
#define OPAL_PMIX_WAKEUP_THREAD(lck) \
177+
do { \
178+
opal_mutex_lock(&(lck)->mutex); \
179+
(lck)->active = false; \
180+
OPAL_POST_OBJECT(lck); \
181+
opal_pmix_condition_broadcast(&(lck)->cond); \
182+
opal_mutex_unlock(&(lck)->mutex); \
183+
} while(0)
184+
65185
END_C_DECLS
66186

67187
#endif

opal/mca/pmix/base/pmix_base_fns.c

Lines changed: 11 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -92,39 +92,6 @@ int opal_pmix_base_notify_event(int status,
9292
return OPAL_SUCCESS;
9393
}
9494

95-
struct lookup_caddy_t {
96-
volatile bool active;
97-
int status;
98-
opal_pmix_pdata_t *pdat;
99-
};
100-
101-
/******** DATA EXCHANGE ********/
102-
static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata)
103-
{
104-
struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata;
105-
cd->status = status;
106-
if (OPAL_SUCCESS == status && NULL != data) {
107-
opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data);
108-
if (NULL != p) {
109-
cd->pdat->proc = p->proc;
110-
if (p->value.type == cd->pdat->value.type) {
111-
if (NULL != cd->pdat->value.key) {
112-
free(cd->pdat->value.key);
113-
}
114-
(void)opal_value_xfer(&cd->pdat->value, &p->value);
115-
}
116-
}
117-
}
118-
cd->active = false;
119-
}
120-
121-
static void opcbfunc(int status, void *cbdata)
122-
{
123-
struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata;
124-
cd->status = status;
125-
cd->active = false;
126-
}
127-
12895
int opal_pmix_base_exchange(opal_value_t *indat,
12996
opal_pmix_pdata_t *outdat,
13097
int timeout)
@@ -133,8 +100,6 @@ int opal_pmix_base_exchange(opal_value_t *indat,
133100
opal_list_t ilist, mlist;
134101
opal_value_t *info;
135102
opal_pmix_pdata_t *pdat;
136-
struct lookup_caddy_t caddy;
137-
char **keys;
138103

139104
/* protect the incoming value */
140105
opal_dss.copy((void**)&info, indat, OPAL_VALUE);
@@ -148,29 +113,10 @@ int opal_pmix_base_exchange(opal_value_t *indat,
148113
opal_list_append(&ilist, &info->super);
149114

150115
/* publish it with "session" scope */
151-
if (NULL == opal_pmix.publish_nb) {
152-
rc = opal_pmix.publish(&ilist);
153-
OPAL_LIST_DESTRUCT(&ilist);
154-
if (OPAL_SUCCESS != rc) {
155-
return rc;
156-
}
157-
} else {
158-
caddy.status = -1;
159-
caddy.active = true;
160-
caddy.pdat = NULL;
161-
rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy);
162-
if (OPAL_SUCCESS != rc) {
163-
OPAL_LIST_DESTRUCT(&ilist);
164-
return rc;
165-
}
166-
while (caddy.active) {
167-
usleep(10);
168-
}
169-
OPAL_LIST_DESTRUCT(&ilist);
170-
if (OPAL_SUCCESS != caddy.status) {
171-
OPAL_ERROR_LOG(caddy.status);
172-
return caddy.status;
173-
}
116+
rc = opal_pmix.publish(&ilist);
117+
OPAL_LIST_DESTRUCT(&ilist);
118+
if (OPAL_SUCCESS != rc) {
119+
return rc;
174120
}
175121

176122
/* lookup the other side's info - if a non-blocking form
@@ -204,43 +150,20 @@ int opal_pmix_base_exchange(opal_value_t *indat,
204150

205151
/* if a non-blocking version of lookup isn't
206152
* available, then use the blocking version */
207-
if (NULL == opal_pmix.lookup_nb) {
208-
OBJ_CONSTRUCT(&ilist, opal_list_t);
209-
opal_list_append(&ilist, &pdat->super);
210-
rc = opal_pmix.lookup(&ilist, &mlist);
211-
OPAL_LIST_DESTRUCT(&mlist);
153+
OBJ_CONSTRUCT(&ilist, opal_list_t);
154+
opal_list_append(&ilist, &pdat->super);
155+
rc = opal_pmix.lookup(&ilist, &mlist);
156+
OPAL_LIST_DESTRUCT(&mlist);
157+
if (OPAL_SUCCESS != rc) {
212158
OPAL_LIST_DESTRUCT(&ilist);
213-
if (OPAL_SUCCESS != rc) {
214-
return rc;
215-
}
216-
} else {
217-
caddy.status = -1;
218-
caddy.active = true;
219-
caddy.pdat = pdat;
220-
keys = NULL;
221-
opal_argv_append_nosize(&keys, pdat->value.key);
222-
rc = opal_pmix.lookup_nb(keys, &mlist, lookup_cbfunc, &caddy);
223-
if (OPAL_SUCCESS != rc) {
224-
OPAL_LIST_DESTRUCT(&mlist);
225-
opal_argv_free(keys);
226-
return rc;
227-
}
228-
while (caddy.active) {
229-
usleep(10);
230-
}
231-
opal_argv_free(keys);
232-
OPAL_LIST_DESTRUCT(&mlist);
233-
if (OPAL_SUCCESS != caddy.status) {
234-
OPAL_ERROR_LOG(caddy.status);
235-
return caddy.status;
236-
}
159+
return rc;
237160
}
238161

239162
/* pass back the result */
240163
outdat->proc = pdat->proc;
241164
free(outdat->value.key);
242165
rc = opal_value_xfer(&outdat->value, &pdat->value);
243-
OBJ_RELEASE(pdat);
166+
OPAL_LIST_DESTRUCT(&ilist);
244167
return rc;
245168
}
246169

opal/mca/pmix/base/pmix_base_frame.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "opal/constants.h"
1414

1515
#include "opal/mca/mca.h"
16+
#include "opal/threads/thread_usage.h"
1617
#include "opal/util/argv.h"
1718
#include "opal/util/output.h"
1819
#include "opal/mca/base/base.h"
@@ -35,7 +36,16 @@ opal_pmix_base_module_t opal_pmix = { 0 };
3536
bool opal_pmix_collect_all_data = true;
3637
int opal_pmix_verbose_output = -1;
3738
bool opal_pmix_base_async_modex = false;
38-
opal_pmix_base_t opal_pmix_base = {0};
39+
opal_pmix_base_t opal_pmix_base = {
40+
.evbase = NULL,
41+
.timeout = 0,
42+
.initialized = 0,
43+
.lock = {
44+
.mutex = OPAL_MUTEX_STATIC_INIT,
45+
.cond = OPAL_PMIX_CONDITION_STATIC_INIT,
46+
.active = false
47+
}
48+
};
3949

4050
static int opal_pmix_base_frame_register(mca_base_register_flag_t flags)
4151
{

opal/mca/pmix/pmix.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ extern int opal_pmix_base_exchange(opal_value_t *info,
146146
OPAL_NAME_PRINT(*(p)), (s))); \
147147
OBJ_CONSTRUCT(&(_ilist), opal_list_t); \
148148
_info = OBJ_NEW(opal_value_t); \
149-
_info->key = strdup(OPAL_PMIX_OPTIONAL); \
149+
_info->key = strdup(OPAL_PMIX_IMMEDIATE); \
150150
_info->type = OPAL_BOOL; \
151151
_info->data.flag = true; \
152152
opal_list_append(&(_ilist), &(_info)->super); \

opal/mca/pmix/pmix2x/pmix/AUTHORS

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,31 @@ Email Name Affiliation(s)
99
alinask Elena Shipunova Mellanox
1010
annu13 Annapurna Dasari Intel
1111
artpol84 Artem Polyakov Mellanox
12+
ashleypittman Ashley Pittman Intel
1213
dsolt Dave Solt IBM
14+
garlick Jim Garlick LLNL
1315
ggouaillardet Gilles Gouaillardet RIST
1416
hjelmn Nathan Hjelm LANL
1517
igor-ivanov Igor Ivanov Mellanox
1618
jladd-mlnx Joshua Ladd Mellanox
17-
jsquyres Jeff Squyres Cisco, IU
19+
jjhursey Joshua Hursey IBM
20+
jsquyres Jeff Squyres Cisco
21+
karasevb Boris Karasev Mellanox
22+
kawashima-fj Takahiro Kawashima Fujitsu
1823
nkogteva Nadezhda Kogteva Mellanox
19-
rhc54 Ralph Castain LANL, Cisco, Intel
24+
nysal Nysal Jan KA IBM
25+
PHHargrove Paul Hargrove LBNL
26+
rhc54 Ralph Castain Intel
2027
------------------------------- --------------------------- -------------------
2128

2229
Affiliation abbreviations:
2330
--------------------------
2431
Cisco = Cisco Systems, Inc.
32+
Fujitsu = Fujitsu
2533
IBM = International Business Machines, Inc.
2634
Intel = Intel, Inc.
27-
IU = Indiana University
2835
LANL = Los Alamos National Laboratory
36+
LBNL = Lawrence Berkeley National Laboratory
37+
LLNL = Lawrence Livermore National Laboratory
2938
Mellanox = Mellanox
3039
RIST = Research Organization for Information Science and Technology

0 commit comments

Comments
 (0)