Skip to content

Commit 702a982

Browse files
author
rhc54
authored
Merge pull request #1767 from rhc54/topic/pmix2
Enable the PMIx event notification capability
2 parents e135543 + 5d330d5 commit 702a982

File tree

264 files changed

+11825
-5905
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

264 files changed

+11825
-5905
lines changed

ompi/errhandler/errhandler.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
1515
* Copyright (c) 2015 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
17-
* Copyright (c) 2015 Intel, Inc. All rights reserved.
17+
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
1818
* $COPYRIGHT$
1919
*
2020
* Additional copyrights may follow
@@ -42,7 +42,7 @@ opal_pointer_array_t ompi_errhandler_f_to_c_table = {{0}};
4242
/*
4343
* default errhandler id
4444
*/
45-
static int default_errhandler_id = -1;
45+
static size_t default_errhandler_id = SIZE_MAX;
4646

4747
/*
4848
* Class information
@@ -163,7 +163,7 @@ int ompi_errhandler_finalize(void)
163163

164164
/* JMS Add stuff here checking for unreleased errorhandlers,
165165
similar to communicators, info handles, etc. */
166-
opal_pmix.deregister_errhandler(default_errhandler_id, NULL, NULL);
166+
opal_pmix.deregister_evhandler(default_errhandler_id, NULL, NULL);
167167

168168
/* Remove errhandler F2C table */
169169

@@ -222,7 +222,7 @@ ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type,
222222

223223
/* registration callback */
224224
void ompi_errhandler_registration_callback(int status,
225-
int errhandler_ref,
225+
size_t errhandler_ref,
226226
void *cbdata)
227227
{
228228
ompi_errhandler_errtrk_t *errtrk = (ompi_errhandler_errtrk_t*)cbdata;
@@ -236,14 +236,15 @@ void ompi_errhandler_registration_callback(int status,
236236
* Default errhandler callback
237237
*/
238238
void ompi_errhandler_callback(int status,
239-
opal_list_t *procs,
240-
opal_list_t *info,
241-
opal_pmix_release_cbfunc_t cbfunc,
239+
const opal_process_name_t *source,
240+
opal_list_t *info, opal_list_t *results,
241+
opal_pmix_notification_complete_fn_t cbfunc,
242242
void *cbdata)
243243
{
244-
/* allow the caller to release its data */
244+
/* tell the event chain engine to go no further - we
245+
* will handle this */
245246
if (NULL != cbfunc) {
246-
cbfunc(cbdata);
247+
cbfunc(OMPI_ERR_HANDLERS_COMPLETE, NULL, NULL, NULL, cbdata);
247248
}
248249
/* our default action is to abort */
249250
ompi_mpi_abort(MPI_COMM_WORLD, status);

ompi/errhandler/errhandler.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
15-
* Copyright (c) 2015 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
1616
* $COPYRIGHT$
1717
*
1818
* Additional copyrights may follow
@@ -380,13 +380,13 @@ typedef struct {
380380
} ompi_errhandler_errtrk_t;
381381

382382
OMPI_DECLSPEC void ompi_errhandler_callback(int status,
383-
opal_list_t *procs,
384-
opal_list_t *info,
385-
opal_pmix_release_cbfunc_t cbfunc,
383+
const opal_process_name_t *source,
384+
opal_list_t *info, opal_list_t *results,
385+
opal_pmix_notification_complete_fn_t cbfunc,
386386
void *cbdata);
387387

388388
OMPI_DECLSPEC void ompi_errhandler_registration_callback(int status,
389-
int errhandler_ref,
389+
size_t errhandler_ref,
390390
void *cbdata);
391391
/**
392392
* Check to see if an errhandler is intrinsic.

ompi/include/ompi/constants.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
* University of Stuttgart. All rights reserved.
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
12+
* Copyright (c) 2016 Intel, Inc. All rights reserved.
1213
* $COPYRIGHT$
1314
*
1415
* Additional copyrights may follow
@@ -63,6 +64,7 @@ enum {
6364

6465
OMPI_ERR_BUFFER = OPAL_ERR_BUFFER,
6566
OMPI_ERR_SILENT = OPAL_ERR_SILENT,
67+
OMPI_ERR_HANDLERS_COMPLETE = OPAL_ERR_HANDLERS_COMPLETE,
6668

6769
OMPI_ERR_REQUEST = OMPI_ERR_BASE - 1,
6870
OMPI_ERR_RMA_SYNC = OMPI_ERR_BASE - 2,

ompi/mca/rte/orte/rte_orte_module.c

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,34 @@ void ompi_rte_abort(int error_code, char *fmt, ...)
9595
exit(-1);
9696
}
9797

98+
static size_t handler = SIZE_MAX;
99+
static bool debugger_register_active = true;
100+
static bool debugger_event_active = true;
101+
102+
static void _release_fn(int status,
103+
const opal_process_name_t *source,
104+
opal_list_t *info, opal_list_t *results,
105+
opal_pmix_notification_complete_fn_t cbfunc,
106+
void *cbdata)
107+
{
108+
/* must let the notifier know we are done */
109+
if (NULL != cbfunc) {
110+
cbfunc(ORTE_SUCCESS, NULL, NULL, NULL, cbdata);
111+
}
112+
debugger_event_active = false;
113+
}
114+
115+
static void _register_fn(int status,
116+
size_t evhandler_ref,
117+
void *cbdata)
118+
{
119+
opal_list_t *codes = (opal_list_t*)cbdata;
120+
121+
handler = evhandler_ref;
122+
OPAL_LIST_RELEASE(codes);
123+
debugger_register_active = false;
124+
}
125+
98126
/*
99127
* Wait for a debugger if asked. We support two ways of waiting for
100128
* attaching debuggers -- see big comment in
@@ -103,7 +131,8 @@ void ompi_rte_abort(int error_code, char *fmt, ...)
103131
void ompi_rte_wait_for_debugger(void)
104132
{
105133
int debugger;
106-
orte_rml_recv_cb_t xfer;
134+
opal_list_t *codes;
135+
opal_value_t *kv;
107136

108137
/* See lengthy comment in orte/tools/orterun/debuggers.c about
109138
orte_in_parallel_debugger */
@@ -133,23 +162,23 @@ void ompi_rte_wait_for_debugger(void)
133162
#endif
134163
}
135164
} else {
136-
/* only the rank=0 proc waits for either a message from the
137-
* HNP or for the debugger to attach - everyone else will just
138-
* spin in * the grpcomm barrier in ompi_mpi_init until rank=0
139-
* joins them.
140-
*/
141-
if (0 != ORTE_PROC_MY_NAME->vpid) {
142-
return;
143-
}
144165

145-
/* VPID 0 waits for a message from the HNP */
146-
OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t);
147-
xfer.active = true;
148-
orte_rml.recv_buffer_nb(OMPI_NAME_WILDCARD,
149-
ORTE_RML_TAG_DEBUGGER_RELEASE,
150-
ORTE_RML_NON_PERSISTENT,
151-
orte_rml_recv_callback, &xfer);
152-
/* let the MPI progress engine run while we wait */
153-
OMPI_WAIT_FOR_COMPLETION(xfer.active);
166+
/* register an event handler for the ORTE_ERR_DEBUGGER_RELEASE event */
167+
codes = OBJ_NEW(opal_list_t);
168+
kv = OBJ_NEW(opal_value_t);
169+
kv->key = strdup("errorcode");
170+
kv->type = OPAL_INT;
171+
kv->data.integer = ORTE_ERR_DEBUGGER_RELEASE;
172+
opal_list_append(codes, &kv->super);
173+
174+
opal_pmix.register_evhandler(codes, NULL, _release_fn, _register_fn, codes);
175+
/* let the MPI progress engine run while we wait for registration to complete */
176+
OMPI_WAIT_FOR_COMPLETION(debugger_register_active);
177+
178+
/* let the MPI progress engine run while we wait for debugger release */
179+
OMPI_WAIT_FOR_COMPLETION(debugger_event_active);
180+
181+
/* deregister the event handler */
182+
opal_pmix.deregister_evhandler(handler, NULL, NULL);
154183
}
155184
}

ompi/mpi/c/lookup_name.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
* All rights reserved.
1313
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
1414
* reserved.
15-
* Copyright (c) 2015 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2015 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18-
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
18+
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
1919
* $COPYRIGHT$
2020
*
2121
* Additional copyrights may follow
@@ -94,13 +94,13 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name)
9494
rng = OBJ_NEW(opal_value_t);
9595
rng->key = strdup(OPAL_PMIX_RANGE);
9696
rng->type = OPAL_INT;
97-
rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
97+
rng->data.integer = OPAL_PMIX_RANGE_NAMESPACE; // share only with procs in same nspace
9898
opal_list_append(&pinfo, &rng->super);
9999
} else if (0 == strcmp(range, "session")) {
100100
rng = OBJ_NEW(opal_value_t);
101101
rng->key = strdup(OPAL_PMIX_RANGE);
102102
rng->type = OPAL_INT;
103-
rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session
103+
rng->data.integer = OPAL_PMIX_RANGE_SESSION; // share only with procs in same session
104104
opal_list_append(&pinfo, &rng->super);
105105
} else {
106106
/* unrecognized scope */

ompi/mpi/c/publish_name.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
* All rights reserved.
1313
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
1414
* reserved.
15-
* Copyright (c) 2015 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2015 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18-
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
18+
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
1919
* $COPYRIGHT$
2020
*
2121
* Additional copyrights may follow
@@ -94,13 +94,13 @@ int MPI_Publish_name(const char *service_name, MPI_Info info,
9494
rng = OBJ_NEW(opal_value_t);
9595
rng->key = strdup(OPAL_PMIX_RANGE);
9696
rng->type = OPAL_INT;
97-
rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
97+
rng->data.integer = OPAL_PMIX_RANGE_NAMESPACE; // share only with procs in same nspace
9898
opal_list_append(&values, &rng->super);
9999
} else if (0 == strcmp(range, "session")) {
100100
rng = OBJ_NEW(opal_value_t);
101101
rng->key = strdup(OPAL_PMIX_RANGE);
102102
rng->type = OPAL_INT;
103-
rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session
103+
rng->data.integer = OPAL_PMIX_RANGE_SESSION; // share only with procs in same session
104104
opal_list_append(&values, &rng->super);
105105
} else {
106106
/* unrecognized scope */

ompi/mpi/c/unpublish_name.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
* All rights reserved.
1313
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
1414
* reserved.
15-
* Copyright (c) 2015 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2015 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18-
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
18+
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
1919
* $COPYRIGHT$
2020
*
2121
* Additional copyrights may follow
@@ -96,13 +96,13 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info,
9696
rng = OBJ_NEW(opal_value_t);
9797
rng->key = strdup(OPAL_PMIX_RANGE);
9898
rng->type = OPAL_INT;
99-
rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
99+
rng->data.integer = OPAL_PMIX_RANGE_NAMESPACE; // share only with procs in same nspace
100100
opal_list_append(&pinfo, &rng->super);
101101
} else if (0 == strcmp(range, "session")) {
102102
rng = OBJ_NEW(opal_value_t);
103103
rng->key = strdup(OPAL_PMIX_RANGE);
104104
rng->type = OPAL_INT;
105-
rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session
105+
rng->data.integer = OPAL_PMIX_RANGE_SESSION; // share only with procs in same session
106106
opal_list_append(&pinfo, &rng->super);
107107
} else {
108108
/* unrecognized scope */

ompi/runtime/ompi_mpi_init.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
380380
char *cmd=NULL, *av=NULL;
381381
ompi_errhandler_errtrk_t errtrk;
382382
volatile bool active;
383+
opal_list_t info;
384+
opal_value_t *kv;
383385
OPAL_TIMING_DECLARE(tm);
384386
OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY);
385387

@@ -522,10 +524,16 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
522524
/* Register the default errhandler callback */
523525
errtrk.status = OPAL_ERROR;
524526
errtrk.active = true;
525-
opal_pmix.register_errhandler(NULL, ompi_errhandler_callback,
526-
ompi_errhandler_registration_callback,
527-
(void*)&errtrk);
527+
/* we want to go first */
528+
OBJ_CONSTRUCT(&info, opal_list_t);
529+
kv = OBJ_NEW(opal_value_t);
530+
kv->key = strdup(OPAL_PMIX_EVENT_ORDER_PREPEND);
531+
opal_list_append(&info, &kv->super);
532+
opal_pmix.register_evhandler(NULL, &info, ompi_errhandler_callback,
533+
ompi_errhandler_registration_callback,
534+
(void*)&errtrk);
528535
OMPI_WAIT_FOR_COMPLETION(errtrk.active);
536+
OPAL_LIST_DESTRUCT(&info);
529537
if (OPAL_SUCCESS != errtrk.status) {
530538
error = "Error handler registration";
531539
ret = errtrk.status;

opal/Makefile.am

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# Copyright (c) 2004-2005 The Regents of the University of California.
1111
# All rights reserved.
1212
# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
13-
# Copyright (c) 2015 Intel, Inc. All rights reserved.
13+
# Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
1414
# $COPYRIGHT$
1515
#
1616
# Additional copyrights may follow
@@ -76,7 +76,6 @@ nobase_opal_HEADERS = $(headers)
7676
endif
7777

7878
include class/Makefile.am
79-
include errhandler/Makefile.am
8079
include memoryhooks/Makefile.am
8180
include runtime/Makefile.am
8281
include threads/Makefile.am

opal/errhandler/Makefile.am

Lines changed: 0 additions & 17 deletions
This file was deleted.

0 commit comments

Comments
 (0)