Skip to content

Commit 1748440

Browse files
author
Ralph Castain
authored
Merge pull request #3662 from rhc54/topic/pmixupagain
Update to pmix v2.0.0rc1, including thread safety fixes
2 parents 21fba8b + c3e6dc2 commit 1748440

23 files changed

+178
-36
lines changed

opal/mca/pmix/pmix2x/pmix/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ greek=
3030
# command, or with the date (if "git describe" fails) in the form of
3131
# "date<date>".
3232

33-
repo_rev=git707f8cf
33+
repo_rev=git071ebc3
3434

3535
# If tarball_version is not empty, it is used as the version string in
3636
# the tarball filename, regardless of all other versions listed in

opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ static inline void pmix_atomic_wmb(void)
6363
}
6464

6565
#define PMIXMB() pmix_atomic_mb()
66+
#define PMIXRMB() pmix_atomic_rmb()
67+
#define PMIXWMB() pmix_atomic_wmb()
6668

6769
/**********************************************************************
6870
*

opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
13-
* Copyright (c) 2010 IBM Corporation. All rights reserved.
13+
* Copyright (c) 2010-2017 IBM Corporation. All rights reserved.
1414
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
1515
* reserved.
1616
* Copyright (c) 2017 Intel, Inc. All rights reserved.
@@ -30,10 +30,8 @@
3030

3131
#define PMIXMB() __asm__ __volatile__ ("sync" : : : "memory")
3232
#define PMIXRMB() __asm__ __volatile__ ("lwsync" : : : "memory")
33-
#define PMIXWMB() __asm__ __volatile__ ("eieio" : : : "memory")
33+
#define PMIXWMB() __asm__ __volatile__ ("lwsync" : : : "memory")
3434
#define PMIXISYNC() __asm__ __volatile__ ("isync" : : : "memory")
35-
#define PMIXSMP_SYNC "sync \n\t"
36-
#define PMIXSMP_ISYNC "\n\tisync"
3735

3836

3937
/**********************************************************************

opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ static const char pmix_version_string[] = PMIX_VERSION;
8888
static void _notify_complete(pmix_status_t status, void *cbdata)
8989
{
9090
pmix_event_chain_t *chain = (pmix_event_chain_t*)cbdata;
91+
PMIX_ACQUIRE_OBJECT(chain);
9192
PMIX_RELEASE(chain);
9293
}
9394

@@ -178,7 +179,7 @@ static void wait_cbfunc(struct pmix_peer_t *pr,
178179

179180
pmix_output_verbose(2, pmix_globals.debug_output,
180181
"pmix:client wait_cbfunc received");
181-
182+
PMIX_POST_OBJECT(active);
182183
*active = false;
183184
}
184185

@@ -197,6 +198,7 @@ static void job_data(struct pmix_peer_t *pr,
197198
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nspace, &cnt, PMIX_STRING))) {
198199
PMIX_ERROR_LOG(rc);
199200
cb->status = PMIX_ERROR;
201+
PMIX_POST_OBJECT(cb);
200202
cb->active = false;
201203
return;
202204
}
@@ -208,6 +210,7 @@ static void job_data(struct pmix_peer_t *pr,
208210
pmix_job_data_htable_store(pmix_globals.myid.nspace, buf);
209211
#endif
210212
cb->status = PMIX_SUCCESS;
213+
PMIX_POST_OBJECT(cb);
211214
cb->active = false;
212215
}
213216

@@ -235,6 +238,7 @@ static void evhandler_reg_callbk(pmix_status_t status,
235238
void *cbdata)
236239
{
237240
volatile int *active = (volatile int*)cbdata;
241+
PMIX_POST_OBJECT(active);
238242
*active = status;
239243
}
240244

@@ -680,6 +684,9 @@ static void _putfn(int sd, short args, void *cbdata)
680684
uint8_t *tmp;
681685
size_t len;
682686

687+
/* need to acquire the cb object from its originating thread */
688+
PMIX_ACQUIRE_OBJECT(cb);
689+
683690
/* no need to push info that starts with "pmix" as that is
684691
* info we would have been provided at startup */
685692
if (0 == strncmp(cb->key, "pmix", 4)) {
@@ -757,6 +764,8 @@ static void _putfn(int sd, short args, void *cbdata)
757764
PMIX_RELEASE(kv); // maintain accounting
758765
}
759766
cb->pstatus = rc;
767+
/* post the data so the receiving thread can acquire it */
768+
PMIX_POST_OBJECT(cb);
760769
cb->active = false;
761770
}
762771

@@ -802,6 +811,9 @@ static void _commitfn(int sd, short args, void *cbdata)
802811
pmix_buffer_t *msgout;
803812
pmix_cmd_t cmd=PMIX_COMMIT_CMD;
804813

814+
/* need to acquire the cb object from its originating thread */
815+
PMIX_ACQUIRE_OBJECT(cb);
816+
805817
msgout = PMIX_NEW(pmix_buffer_t);
806818
/* pack the cmd */
807819
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &cmd, 1, PMIX_CMD))) {
@@ -850,6 +862,8 @@ static void _commitfn(int sd, short args, void *cbdata)
850862

851863
done:
852864
cb->pstatus = rc;
865+
/* post the data so the receiving thread can acquire it */
866+
PMIX_POST_OBJECT(cb);
853867
cb->active = false;
854868
}
855869

@@ -901,6 +915,9 @@ static void _peersfn(int sd, short args, void *cbdata)
901915
#endif
902916
size_t i;
903917

918+
/* need to acquire the cb object from its originating thread */
919+
PMIX_ACQUIRE_OBJECT(cb);
920+
904921
/* cycle across our known nspaces */
905922
tmp = NULL;
906923
#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)
@@ -955,6 +972,8 @@ static void _peersfn(int sd, short args, void *cbdata)
955972

956973
done:
957974
cb->pstatus = rc;
975+
/* post the data so the receiving thread can acquire it */
976+
PMIX_POST_OBJECT(cb);
958977
cb->active = false;
959978
}
960979

@@ -1004,6 +1023,9 @@ static void _nodesfn(int sd, short args, void *cbdata)
10041023
pmix_nspace_t *nsptr;
10051024
pmix_nrec_t *nptr;
10061025

1026+
/* need to acquire the cb object from its originating thread */
1027+
PMIX_ACQUIRE_OBJECT(cb);
1028+
10071029
/* cycle across our known nspaces */
10081030
tmp = NULL;
10091031
PMIX_LIST_FOREACH(nsptr, &pmix_globals.nspaces, pmix_nspace_t) {
@@ -1023,6 +1045,8 @@ static void _nodesfn(int sd, short args, void *cbdata)
10231045
}
10241046

10251047
cb->pstatus = rc;
1048+
/* post the data so the receiving thread can acquire it */
1049+
PMIX_POST_OBJECT(cb);
10261050
cb->active = false;
10271051
}
10281052

opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
44
* Copyright (c) 2014-2017 Research Organization for Information Science
55
* and Technology (RIST). All rights reserved.
66
* Copyright (c) 2014 Artem Y. Polyakov <[email protected]>.
@@ -51,6 +51,8 @@
5151
#include "src/util/argv.h"
5252
#include "src/util/error.h"
5353
#include "src/util/output.h"
54+
#include "src/threads/threads.h"
55+
5456
#include "src/mca/ptl/ptl.h"
5557

5658
#include "pmix_client_ops.h"
@@ -344,5 +346,6 @@ static void op_cbfunc(pmix_status_t status, void *cbdata)
344346
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
345347

346348
cb->status = status;
349+
PMIX_POST_OBJECT(cb);
347350
cb->active = false;
348351
}

opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353

5454
#include "src/class/pmix_list.h"
5555
#include "src/buffer_ops/buffer_ops.h"
56+
#include "src/threads/threads.h"
5657
#include "src/util/argv.h"
5758
#include "src/util/compress.h"
5859
#include "src/util/error.h"
@@ -186,12 +187,14 @@ static void _value_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata)
186187
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
187188
pmix_status_t rc;
188189

190+
PMIX_ACQUIRE_OBJECT(cb);
189191
cb->status = status;
190192
if (PMIX_SUCCESS == status) {
191193
if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&cb->value, kv, PMIX_VALUE))) {
192194
PMIX_ERROR_LOG(rc);
193195
}
194196
}
197+
PMIX_POST_OBJECT(cb);
195198
cb->active = false;
196199
}
197200

@@ -238,12 +241,12 @@ static pmix_buffer_t* _pack_get(char *nspace, pmix_rank_t rank,
238241
return msg;
239242
}
240243

241-
/* this callback is coming from the usock recv, and thus
244+
/* this callback is coming from the ptl recv, and thus
242245
* is occurring inside of our progress thread - hence, no
243246
* need to thread shift */
244247
static void _getnb_cbfunc(struct pmix_peer_t *pr,
245248
pmix_ptl_hdr_t *hdr,
246-
pmix_buffer_t *buf, void *cbdata)
249+
pmix_buffer_t *buf, void *cbdata)
247250
{
248251
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
249252
pmix_cb_t *cb2;
@@ -486,6 +489,9 @@ static void _getnbfn(int fd, short flags, void *cbdata)
486489
char *tmp;
487490
bool my_nspace = false, my_rank = false;
488491

492+
/* cb was passed to us from another thread - acquire it */
493+
PMIX_ACQUIRE_OBJECT(cb);
494+
489495
pmix_output_verbose(2, pmix_globals.debug_output,
490496
"pmix: getnbfn value for proc %s:%d key %s",
491497
cb->nspace, cb->rank,
@@ -739,11 +745,12 @@ static void _getnbfn(int fd, short flags, void *cbdata)
739745
rc = PMIX_ERROR;
740746
goto respond;
741747
}
742-
748+
/* we made a lot of changes to cb, so ensure they get
749+
* written out before we return */
750+
PMIX_POST_OBJECT(cb);
743751
return;
744752

745-
respond:
746-
753+
respond:
747754
/* if a callback was provided, execute it */
748755
if (NULL != cb->value_cbfunc) {
749756
if (NULL != val) {

opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
44
* Copyright (c) 2014-2015 Research Organization for Information Science
55
* and Technology (RIST). All rights reserved.
66
* Copyright (c) 2014 Artem Y. Polyakov <[email protected]>.
@@ -48,6 +48,7 @@
4848

4949
#include "src/class/pmix_list.h"
5050
#include "src/buffer_ops/buffer_ops.h"
51+
#include "src/threads/threads.h"
5152
#include "src/util/argv.h"
5253
#include "src/util/error.h"
5354
#include "src/util/output.h"
@@ -304,7 +305,8 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys,
304305
}
305306

306307
PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys,
307-
const pmix_info_t info[], size_t ninfo)
308+
const pmix_info_t info[],
309+
size_t ninfo)
308310
{
309311
pmix_status_t rc;
310312
pmix_cb_t *cb;
@@ -417,6 +419,8 @@ static void wait_cbfunc(struct pmix_peer_t *pr,
417419
int ret;
418420
int32_t cnt;
419421

422+
PMIX_ACQUIRE_OBJECT(cb);
423+
420424
pmix_output_verbose(2, pmix_globals.debug_output,
421425
"pmix:client recv callback activated with %d bytes",
422426
(NULL == buf) ? -1 : (int)buf->bytes_used);
@@ -437,6 +441,7 @@ static void op_cbfunc(pmix_status_t status, void *cbdata)
437441
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
438442

439443
cb->status = status;
444+
PMIX_POST_OBJECT(cb);
440445
cb->active = false;
441446
}
442447

@@ -450,6 +455,8 @@ static void wait_lookup_cbfunc(struct pmix_peer_t *pr,
450455
pmix_pdata_t *pdata;
451456
size_t ndata;
452457

458+
PMIX_ACQUIRE_OBJECT(cb);
459+
453460
pmix_output_verbose(2, pmix_globals.debug_output,
454461
"pmix:client recv callback activated with %d bytes",
455462
(NULL == buf) ? -1 : (int)buf->bytes_used);
@@ -514,6 +521,7 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda
514521
pmix_pdata_t *tgt = (pmix_pdata_t*)cb->cbdata;
515522
size_t i, j;
516523

524+
PMIX_ACQUIRE_OBJECT(cb);
517525
cb->status = status;
518526
if (PMIX_SUCCESS == status) {
519527
/* find the matching key in the provided info array - error if not found */
@@ -530,6 +538,6 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda
530538
}
531539
}
532540
}
533-
541+
PMIX_POST_OBJECT(cb);
534542
cb->active = false;
535543
}

opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
44
* Copyright (c) 2014-2017 Research Organization for Information Science
55
* and Technology (RIST). All rights reserved.
66
* Copyright (c) 2014 Artem Y. Polyakov <[email protected]>.
@@ -48,6 +48,7 @@
4848

4949
#include "src/class/pmix_list.h"
5050
#include "src/buffer_ops/buffer_ops.h"
51+
#include "src/threads/threads.h"
5152
#include "src/util/argv.h"
5253
#include "src/util/error.h"
5354
#include "src/util/output.h"
@@ -189,6 +190,8 @@ static void wait_cbfunc(struct pmix_peer_t *pr,
189190
pmix_status_t rc, ret;
190191
int32_t cnt;
191192

193+
PMIX_ACQUIRE_OBJECT(cb);
194+
192195
pmix_output_verbose(2, pmix_globals.debug_output,
193196
"pmix:client recv callback activated with %d bytes",
194197
(NULL == buf) ? -1 : (int)buf->bytes_used);
@@ -233,9 +236,11 @@ static void spawn_cbfunc(pmix_status_t status, char nspace[], void *cbdata)
233236
{
234237
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
235238

239+
PMIX_ACQUIRE_OBJECT(cb);
236240
cb->status = status;
237241
if (NULL != nspace) {
238242
(void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN);
239243
}
244+
PMIX_POST_OBJECT(cb);
240245
cb->active = false;
241246
}

opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,9 @@ static void progress_local_event_hdlr(pmix_status_t status,
254254
pmix_op_cbfunc_t cbfunc, void *thiscbdata,
255255
void *notification_cbdata)
256256
{
257+
/* this may be in the host's thread, so we need to threadshift it
258+
* before accessing our internal data */
259+
257260
pmix_event_chain_t *chain = (pmix_event_chain_t*)notification_cbdata;
258261
size_t n, nsave, cnt;
259262
pmix_info_t *newinfo;
@@ -768,6 +771,9 @@ static void _notify_client_event(int sd, short args, void *cbdata)
768771
size_t n;
769772
bool matched, holdcd;
770773

774+
/* need to acquire the object from its originating thread */
775+
PMIX_ACQUIRE_OBJECT(cd);
776+
771777
pmix_output_verbose(2, pmix_globals.debug_output,
772778
"pmix_server: _notify_error notifying clients of error %s",
773779
PMIx_Error_string(cd->status));
@@ -1056,6 +1062,9 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg)
10561062
{
10571063
pmix_event_chain_t *ch = (pmix_event_chain_t*)arg;
10581064

1065+
/* need to acquire the object from its originating thread */
1066+
PMIX_ACQUIRE_OBJECT(ch);
1067+
10591068
ch->timer_active = false;
10601069

10611070
/* remove it from the list */

opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,9 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
344344
pmix_notify_caddy_t *ncd;
345345
pmix_event_chain_t *chain;
346346

347+
/* need to acquire the object from its originating thread */
348+
PMIX_ACQUIRE_OBJECT(cd);
349+
347350
pmix_output_verbose(2, pmix_globals.debug_output,
348351
"pmix: register event_hdlr with %d infos", (int)cd->ninfo);
349352

@@ -775,6 +778,9 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata)
775778
size_t n;
776779
pmix_active_code_t *active;
777780

781+
/* need to acquire the object from its originating thread */
782+
PMIX_ACQUIRE_OBJECT(cd);
783+
778784
/* if I am not the server, then I need to notify the server
779785
* to remove my registration */
780786
if (PMIX_PROC_SERVER != pmix_globals.proc_type) {

0 commit comments

Comments
 (0)