Skip to content

Commit ccf55a0

Browse files
committed
Merge pull request open-mpi#1035 from rhc54/cmr2.0/usock
Restore the usock oob component
2 parents 57fe7e5 + 6e802cf commit ccf55a0

21 files changed

+3665
-65
lines changed

ompi/mca/rte/orte/rte_orte_module.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ void ompi_rte_wait_for_debugger(void)
113113
debugger = 1;
114114
}
115115

116-
if (!debugger) {
116+
if (!debugger && NULL == getenv("ORTE_TEST_DEBUGGER_ATTACH")) {
117117
/* if not, just return */
118118
return;
119119
}

orte/mca/ess/base/ess_base_std_app.c

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,13 @@
4747
#include "opal/runtime/opal.h"
4848
#include "opal/runtime/opal_progress_threads.h"
4949

50+
#include "orte/mca/rml/base/base.h"
51+
#include "orte/mca/routed/base/base.h"
5052
#include "orte/mca/errmgr/errmgr.h"
5153
#include "orte/mca/dfs/base/base.h"
5254
#include "orte/mca/grpcomm/base/base.h"
55+
#include "orte/mca/oob/base/base.h"
56+
#include "orte/mca/rml/rml.h"
5357
#include "orte/mca/odls/odls_types.h"
5458
#include "orte/mca/filem/base/base.h"
5559
#include "orte/mca/errmgr/base/base.h"
@@ -173,14 +177,73 @@ int orte_ess_base_app_setup(bool db_restrict_local)
173177
}
174178
OBJ_DESTRUCT(&kv);
175179
}
176-
180+
/* Setup the communication infrastructure */
181+
/*
182+
* OOB Layer
183+
*/
184+
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
185+
ORTE_ERROR_LOG(ret);
186+
error = "orte_oob_base_open";
187+
goto error;
188+
}
189+
if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
190+
ORTE_ERROR_LOG(ret);
191+
error = "orte_oob_base_select";
192+
goto error;
193+
}
194+
/* Runtime Messaging Layer */
195+
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
196+
ORTE_ERROR_LOG(ret);
197+
error = "orte_rml_base_open";
198+
goto error;
199+
}
200+
if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
201+
ORTE_ERROR_LOG(ret);
202+
error = "orte_rml_base_select";
203+
goto error;
204+
}
177205
/* setup the errmgr */
178206
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
179207
ORTE_ERROR_LOG(ret);
180208
error = "orte_errmgr_base_select";
181209
goto error;
182210
}
183-
211+
/* Routed system */
212+
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
213+
ORTE_ERROR_LOG(ret);
214+
error = "orte_routed_base_open";
215+
goto error;
216+
}
217+
if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
218+
ORTE_ERROR_LOG(ret);
219+
error = "orte_routed_base_select";
220+
goto error;
221+
}
222+
/*
223+
* Group communications
224+
*/
225+
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
226+
ORTE_ERROR_LOG(ret);
227+
error = "orte_grpcomm_base_open";
228+
goto error;
229+
}
230+
if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
231+
ORTE_ERROR_LOG(ret);
232+
error = "orte_grpcomm_base_select";
233+
goto error;
234+
}
235+
/* enable communication via the rml */
236+
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
237+
ORTE_ERROR_LOG(ret);
238+
error = "orte_rml.enable_comm";
239+
goto error;
240+
}
241+
/* setup the routed info */
242+
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
243+
ORTE_ERROR_LOG(ret);
244+
error = "orte_routed.init_routes";
245+
goto error;
246+
}
184247
/* open the distributed file system */
185248
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
186249
ORTE_ERROR_LOG(ret);
@@ -216,7 +279,13 @@ int orte_ess_base_app_finalize(void)
216279
(void) mca_base_framework_close(&orte_filem_base_framework);
217280
(void) mca_base_framework_close(&orte_errmgr_base_framework);
218281

282+
/* now can close the rml and its friendly group comm */
283+
(void) mca_base_framework_close(&orte_grpcomm_base_framework);
219284
(void) mca_base_framework_close(&orte_dfs_base_framework);
285+
(void) mca_base_framework_close(&orte_routed_base_framework);
286+
287+
(void) mca_base_framework_close(&orte_rml_base_framework);
288+
(void) mca_base_framework_close(&orte_oob_base_framework);
220289
(void) mca_base_framework_close(&orte_state_base_framework);
221290

222291
orte_session_dir_finalize(ORTE_PROC_MY_NAME);
@@ -270,7 +339,7 @@ void orte_ess_base_app_abort(int status, bool report)
270339
* the message if routing is enabled as this indicates we
271340
* have someone to send to
272341
*/
273-
if (report && orte_create_session_dirs) {
342+
if (report && orte_routing_is_enabled && orte_create_session_dirs) {
274343
myfile = opal_os_path(false, orte_process_info.proc_session_dir, "aborted", NULL);
275344
fd = open(myfile, O_CREAT, S_IRUSR);
276345
close(fd);

orte/mca/ess/pmi/ess_pmi_module.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@
4747
#include "opal/mca/pmix/base/base.h"
4848

4949
#include "orte/mca/errmgr/errmgr.h"
50+
#include "orte/mca/grpcomm/grpcomm.h"
51+
#include "orte/mca/rml/rml.h"
5052
#include "orte/util/proc_info.h"
5153
#include "orte/util/show_help.h"
5254
#include "orte/util/name_fns.h"
@@ -83,6 +85,7 @@ static int rte_init(void)
8385
char *envar, *ev1, *ev2;
8486
uint64_t unique_key[2];
8587
char *string_key;
88+
char *rmluri;
8689
opal_value_t *kv;
8790
char *val;
8891
int u32, *u32ptr;
@@ -358,6 +361,16 @@ static int rte_init(void)
358361

359362
/*** PUSH DATA FOR OTHERS TO FIND ***/
360363

364+
/* push our RML URI in case others need to talk directly to us */
365+
rmluri = orte_rml.get_contact_info();
366+
/* push it out for others to use */
367+
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING);
368+
if (ORTE_SUCCESS != ret) {
369+
error = "pmix put uri";
370+
goto error;
371+
}
372+
free(rmluri);
373+
361374
/* push our hostname so others can find us, if they need to */
362375
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
363376
if (ORTE_SUCCESS != ret) {

orte/mca/oob/usock/Makefile.am

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#
2+
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3+
# University Research and Technology
4+
# Corporation. All rights reserved.
5+
# Copyright (c) 2004-2005 The University of Tennessee and The University
6+
# of Tennessee Research Foundation. All rights
7+
# reserved.
8+
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
9+
# University of Stuttgart. All rights reserved.
10+
# Copyright (c) 2004-2005 The Regents of the University of California.
11+
# All rights reserved.
12+
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
13+
# Copyright (c) 2012-2013 Los Alamos National Security, LLC.
14+
# All rights reserved
15+
# Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
16+
# $COPYRIGHT$
17+
#
18+
# Additional copyrights may follow
19+
#
20+
# $HEADER$
21+
#
22+
23+
sources = \
24+
oob_usock_component.h \
25+
oob_usock.h \
26+
oob_usock_component.c \
27+
oob_usock_connection.h \
28+
oob_usock_sendrecv.h \
29+
oob_usock_hdr.h \
30+
oob_usock_peer.h \
31+
oob_usock_ping.h \
32+
oob_usock.c \
33+
oob_usock_connection.c \
34+
oob_usock_sendrecv.c
35+
36+
# Make the output library in this directory, and name it either
37+
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
38+
# (for static builds).
39+
40+
if MCA_BUILD_orte_oob_usock_DSO
41+
component_noinst =
42+
component_install = mca_oob_usock.la
43+
else
44+
component_noinst = libmca_oob_usock.la
45+
component_install =
46+
endif
47+
48+
mcacomponentdir = $(ortelibdir)
49+
mcacomponent_LTLIBRARIES = $(component_install)
50+
mca_oob_usock_la_SOURCES = $(sources)
51+
mca_oob_usock_la_LDFLAGS = -module -avoid-version
52+
53+
noinst_LTLIBRARIES = $(component_noinst)
54+
libmca_oob_usock_la_SOURCES = $(sources)
55+
libmca_oob_usock_la_LDFLAGS = -module -avoid-version
56+

orte/mca/oob/usock/configure.m4

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# -*- shell-script -*-
2+
#
3+
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4+
# University Research and Technology
5+
# Corporation. All rights reserved.
6+
# Copyright (c) 2004-2005 The University of Tennessee and The University
7+
# of Tennessee Research Foundation. All rights
8+
# reserved.
9+
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10+
# University of Stuttgart. All rights reserved.
11+
# Copyright (c) 2004-2005 The Regents of the University of California.
12+
# All rights reserved.
13+
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
14+
# All rights reserved.
15+
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
16+
# Copyright (c) 2013 Intel, Inc. All rights reserved.
17+
# $COPYRIGHT$
18+
#
19+
# Additional copyrights may follow
20+
#
21+
# $HEADER$
22+
#
23+
24+
# MCA_oob_usock_CONFIG([action-if-found], [action-if-not-found])
25+
# -----------------------------------------------------------
26+
AC_DEFUN([MCA_orte_oob_usock_CONFIG],[
27+
AC_CONFIG_FILES([orte/mca/oob/usock/Makefile])
28+
29+
# check for sockaddr_un (a good sign we have Unix domain sockets)
30+
AC_CHECK_TYPES([struct sockaddr_un],
31+
[oob_usock_happy="yes"],
32+
[oob_usock_happy="no"],
33+
[AC_INCLUDES_DEFAULT
34+
#ifdef HAVE_SYS_SOCKET_H
35+
#include <sys/socket.h>
36+
#endif
37+
#ifdef HAVE_SYS_UN_H
38+
#include <sys/un.h>
39+
#endif])
40+
41+
AS_IF([test "$oob_usock_happy" = "yes"], [$1], [$2])
42+
])dnl
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# -*- text -*-
2+
#
3+
# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
4+
# University Research and Technology
5+
# Corporation. All rights reserved.
6+
# Copyright (c) 2004-2006 The University of Tennessee and The University
7+
# of Tennessee Research Foundation. All rights
8+
# reserved.
9+
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10+
# University of Stuttgart. All rights reserved.
11+
# Copyright (c) 2004-2005 The Regents of the University of California.
12+
# All rights reserved.
13+
# $COPYRIGHT$
14+
#
15+
# Additional copyrights may follow
16+
#
17+
# $HEADER$
18+
#
19+
[static-and-dynamic]
20+
Both static and dynamic port ranges were specified for the
21+
out-of-band (OOB) communication subsystem:
22+
23+
Static ports: %s
24+
Dynamic ports: %s
25+
26+
Only one can be specified. Please choose either static or
27+
dynamic ports and try again.
28+
#
29+
[include-exclude]
30+
Both TCP interface include and exclude lists were specified:
31+
32+
Include: %s
33+
Exclude: %s
34+
35+
Only one of these can be given.
36+
#
37+
[not-parseable]
38+
The specified network is not parseable. Since we cannot determine
39+
your desired intent, we cannot establish a TCP socket for out-of-band
40+
communications and will therefore abort. Please correct the network
41+
specification and retry.
42+
#
43+
[no-included-found]
44+
None of the networks specified to be included for out-of-band communications
45+
could be found:
46+
47+
Value given: %s
48+
49+
Please revise the specification and try again.
50+
#
51+
[excluded-all]
52+
The specified list of networks to be excluded for out-of-band communications
53+
resulted in no networks being available:
54+
55+
Value given: %s
56+
57+
Please revise the specification and try again.
58+
#
59+
[no-interfaces-avail]
60+
No network interfaces were found for out-of-band communications. We require
61+
at least one available network for TCP-based messaging.
62+
#
63+
[invalid if_inexclude]
64+
WARNING: An invalid value was given for oob_tcp_if_%s. This
65+
value will be ignored.
66+
67+
Local host: %s
68+
Value: %s
69+
Message: %s
70+
#

0 commit comments

Comments
 (0)