Skip to content

Commit 649301a

Browse files
author
Ralph Castain
committed
Revise the routed framework to be multi-select so it can support the new conduit system. Update all calls to rml.send* to the new syntax. Define an orte_mgmt_conduit for admin and IOF messages, and an orte_coll_conduit for all collective operations (e.g., xcast, modex, and barrier).
Still not completely done as we need a better way of tracking the routed module being used down in the OOB - e.g., when a peer drops connection, we want to remove that route from all conduits that (a) use the OOB and (b) are routed, but we don't want to remove it from an OFI conduit.
1 parent 7d7cf10 commit 649301a

File tree

89 files changed

+1374
-1948
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+1374
-1948
lines changed

contrib/platform/intel/bend/mac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ enable_shared=yes
1313
enable_static=no
1414
enable_io_romio=no
1515
enable_ipv6=no
16-
enable_mpi_fortran=yes
16+
enable_mpi_fortran=no
1717
enable_mpi_cxx=no
1818
enable_mpi_cxx_seek=no
1919
enable_memchecker=no

opal/mca/base/mca_base_var.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1890,13 +1890,13 @@ static char *source_name(mca_base_var_t *var)
18901890

18911891
static int var_value_string (mca_base_var_t *var, char **value_string)
18921892
{
1893-
const mca_base_var_storage_t *value;
1893+
const mca_base_var_storage_t *value=NULL;
18941894
int ret;
18951895

18961896
assert (MCA_BASE_VAR_TYPE_MAX > var->mbv_type);
18971897

18981898
ret = mca_base_var_get_value(var->mbv_index, &value, NULL, NULL);
1899-
if (OPAL_SUCCESS !=ret) {
1899+
if (OPAL_SUCCESS != ret || NULL == value) {
19001900
return ret;
19011901
}
19021902

opal/mca/pmix/ext3x/pmix3x.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,7 @@ opal_pmix_data_range_t pmix3x_convert_range(pmix_data_range_t range) {
687687
case PMIX_RANGE_CUSTOM:
688688
return OPAL_PMIX_RANGE_CUSTOM;
689689
default:
690-
return OPAL_PMIX_SCOPE_UNDEF;
690+
return OPAL_PMIX_RANGE_UNDEF;
691691
}
692692
}
693693

opal/mca/pmix/pmix3x/pmix3x.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,7 @@ opal_pmix_data_range_t pmix3x_convert_range(pmix_data_range_t range) {
687687
case PMIX_RANGE_CUSTOM:
688688
return OPAL_PMIX_RANGE_CUSTOM;
689689
default:
690-
return OPAL_PMIX_SCOPE_UNDEF;
690+
return OPAL_PMIX_RANGE_UNDEF;
691691
}
692692
}
693693

opal/runtime/opal_info_support.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
* Copyright (c) 2010-2016 Los Alamos National Security, LLC. All rights
1515
* reserved.
1616
* Copyright (c) 2011-2012 University of Houston. All rights reserved.
17+
* Copyright (c) 2016 Intel, Inc. All rights reserved.
1718
* $COPYRIGHT$
1819
*
1920
* Additional copyrights may follow
@@ -626,12 +627,12 @@ static void opal_info_show_mca_group_params(const mca_base_var_group_t *group, m
626627
/* read the selection parameter */
627628
var_id = mca_base_var_find (group->group_project, group->group_framework, NULL, NULL);
628629
if (0 <= var_id) {
629-
const mca_base_var_storage_t *value;
630+
const mca_base_var_storage_t *value=NULL;
630631
char **requested_components;
631632
bool include_mode;
632633

633634
mca_base_var_get_value (var_id, &value, NULL, NULL);
634-
if (NULL != value->stringval && '\0' != value->stringval[0]) {
635+
if (NULL != value && NULL != value->stringval && '\0' != value->stringval[0]) {
635636
mca_base_component_parse_requested (value->stringval, &include_mode, &requested_components);
636637

637638
for (i = 0, requested = !include_mode ; requested_components[i] ; ++i) {

orte/mca/dfs/app/dfs_app.c

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
33
* All rights reserved.
4-
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved
4+
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
55
* Copyright (c) 2014 Research Organization for Information Science
66
* and Technology (RIST). All rights reserved.
77
* $COPYRIGHT$
@@ -615,7 +615,8 @@ static void process_opens(int fd, short args, void *cbdata)
615615
ORTE_NAME_PRINT(&daemon),
616616
filename);
617617
/* send it */
618-
if (0 > (rc = orte_rml.send_buffer_nb(&daemon, buffer,
618+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
619+
&daemon, buffer,
619620
ORTE_RML_TAG_DFS_CMD,
620621
orte_rml_send_callback, NULL))) {
621622
ORTE_ERROR_LOG(rc);
@@ -721,7 +722,8 @@ static void process_close(int fd, short args, void *cbdata)
721722
ORTE_NAME_PRINT(&trk->host_daemon),
722723
trk->local_fd);
723724
/* send it */
724-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
725+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
726+
&trk->host_daemon, buffer,
725727
ORTE_RML_TAG_DFS_CMD,
726728
orte_rml_send_callback, NULL))) {
727729
ORTE_ERROR_LOG(rc);
@@ -843,7 +845,8 @@ static void process_sizes(int fd, short args, void *cbdata)
843845
ORTE_NAME_PRINT(&trk->host_daemon),
844846
trk->local_fd);
845847
/* send it */
846-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
848+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
849+
&trk->host_daemon, buffer,
847850
ORTE_RML_TAG_DFS_CMD,
848851
orte_rml_send_callback, NULL))) {
849852
ORTE_ERROR_LOG(rc);
@@ -997,7 +1000,8 @@ static void process_seeks(int fd, short args, void *cbdata)
9971000
ORTE_NAME_PRINT(&trk->host_daemon),
9981001
trk->local_fd);
9991002
/* send it */
1000-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
1003+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1004+
&trk->host_daemon, buffer,
10011005
ORTE_RML_TAG_DFS_CMD,
10021006
orte_rml_send_callback, NULL))) {
10031007
ORTE_ERROR_LOG(rc);
@@ -1109,7 +1113,8 @@ static void process_reads(int fd, short args, void *cbdata)
11091113
ORTE_NAME_PRINT(&trk->host_daemon),
11101114
trk->local_fd);
11111115
/* send it */
1112-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
1116+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1117+
&trk->host_daemon, buffer,
11131118
ORTE_RML_TAG_DFS_CMD,
11141119
orte_rml_send_callback, NULL))) {
11151120
ORTE_ERROR_LOG(rc);
@@ -1176,7 +1181,8 @@ static void process_posts(int fd, short args, void *cbdata)
11761181
goto error;
11771182
}
11781183
/* send it */
1179-
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_DAEMON, buffer,
1184+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1185+
ORTE_PROC_MY_DAEMON, buffer,
11801186
ORTE_RML_TAG_DFS_CMD,
11811187
orte_rml_send_callback, NULL))) {
11821188
ORTE_ERROR_LOG(rc);
@@ -1237,7 +1243,8 @@ static void process_getfm(int fd, short args, void *cbdata)
12371243
goto error;
12381244
}
12391245
/* send it */
1240-
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_DAEMON, buffer,
1246+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1247+
ORTE_PROC_MY_DAEMON, buffer,
12411248
ORTE_RML_TAG_DFS_CMD,
12421249
orte_rml_send_callback, NULL))) {
12431250
ORTE_ERROR_LOG(rc);

orte/mca/dfs/orted/dfs_orted.c

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
33
* All rights reserved.
44
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
5-
* Copyright (c) 2015 Intel, Inc. All rights reserved
5+
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
66
* Copyright (c) 2015 Research Organization for Information Science
77
* and Technology (RIST). All rights reserved.
88
* $COPYRIGHT$
@@ -414,7 +414,8 @@ static void process_opens(int fd, short args, void *cbdata)
414414
free(filename);
415415
filename = NULL;
416416
/* send it */
417-
if (0 > (rc = orte_rml.send_buffer_nb(&node->daemon->name, buffer,
417+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
418+
&node->daemon->name, buffer,
418419
ORTE_RML_TAG_DFS_CMD,
419420
orte_rml_send_callback, NULL))) {
420421
ORTE_ERROR_LOG(rc);
@@ -525,7 +526,8 @@ static void process_close(int fd, short args, void *cbdata)
525526
ORTE_NAME_PRINT(&trk->host_daemon),
526527
trk->local_fd);
527528
/* send it */
528-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
529+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
530+
&trk->host_daemon, buffer,
529531
ORTE_RML_TAG_DFS_CMD,
530532
orte_rml_send_callback, NULL))) {
531533
ORTE_ERROR_LOG(rc);
@@ -630,7 +632,8 @@ static void process_sizes(int fd, short args, void *cbdata)
630632
ORTE_NAME_PRINT(&trk->host_daemon),
631633
trk->local_fd);
632634
/* send it */
633-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
635+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
636+
&trk->host_daemon, buffer,
634637
ORTE_RML_TAG_DFS_CMD,
635638
orte_rml_send_callback, NULL))) {
636639
ORTE_ERROR_LOG(rc);
@@ -778,7 +781,8 @@ static void process_seeks(int fd, short args, void *cbdata)
778781
ORTE_NAME_PRINT(&trk->host_daemon),
779782
trk->local_fd);
780783
/* send it */
781-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
784+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
785+
&trk->host_daemon, buffer,
782786
ORTE_RML_TAG_DFS_CMD,
783787
orte_rml_send_callback, NULL))) {
784788
ORTE_ERROR_LOG(rc);
@@ -888,7 +892,8 @@ static void process_reads(int fd, short args, void *cbdata)
888892
ORTE_NAME_PRINT(&trk->host_daemon),
889893
trk->local_fd);
890894
/* send it */
891-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
895+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
896+
&trk->host_daemon, buffer,
892897
ORTE_RML_TAG_DFS_CMD,
893898
orte_rml_send_callback, NULL))) {
894899
ORTE_ERROR_LOG(rc);
@@ -1385,7 +1390,8 @@ static void recv_dfs_cmd(int status, orte_process_name_t* sender,
13851390
return;
13861391
}
13871392
/* send it */
1388-
if (0 > (rc = orte_rml.send_buffer_nb(sender, answer,
1393+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1394+
sender, answer,
13891395
ORTE_RML_TAG_DFS_DATA,
13901396
orte_rml_send_callback, NULL))) {
13911397
ORTE_ERROR_LOG(rc);
@@ -1473,7 +1479,8 @@ static void recv_dfs_cmd(int status, orte_process_name_t* sender,
14731479
return;
14741480
}
14751481
/* send it */
1476-
if (0 > (rc = orte_rml.send_buffer_nb(sender, answer,
1482+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1483+
sender, answer,
14771484
ORTE_RML_TAG_DFS_DATA,
14781485
orte_rml_send_callback, NULL))) {
14791486
ORTE_ERROR_LOG(rc);
@@ -1582,7 +1589,8 @@ static void recv_dfs_cmd(int status, orte_process_name_t* sender,
15821589
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
15831590
(long)bytes_read,
15841591
ORTE_NAME_PRINT(sender));
1585-
if (0 > (rc = orte_rml.send_buffer_nb(sender, answer,
1592+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1593+
sender, answer,
15861594
ORTE_RML_TAG_DFS_DATA,
15871595
orte_rml_send_callback, NULL))) {
15881596
ORTE_ERROR_LOG(rc);
@@ -1696,7 +1704,8 @@ static void recv_dfs_cmd(int status, orte_process_name_t* sender,
16961704
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
16971705
(long)bytes_read,
16981706
ORTE_NAME_PRINT(sender));
1699-
if (0 > (rc = orte_rml.send_buffer_nb(sender, answer,
1707+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1708+
sender, answer,
17001709
ORTE_RML_TAG_DFS_DATA,
17011710
orte_rml_send_callback, NULL))) {
17021711
ORTE_ERROR_LOG(rc);
@@ -1748,7 +1757,8 @@ static void recv_dfs_cmd(int status, orte_process_name_t* sender,
17481757
ORTE_ERROR_LOG(rc);
17491758
return;
17501759
}
1751-
if (0 > (rc = orte_rml.send_buffer_nb(sender, answer,
1760+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1761+
sender, answer,
17521762
ORTE_RML_TAG_DFS_DATA,
17531763
orte_rml_send_callback, NULL))) {
17541764
ORTE_ERROR_LOG(rc);
@@ -1917,7 +1927,8 @@ static void recv_dfs_cmd(int status, orte_process_name_t* sender,
19171927
"%s getf-cmd: returning %d maps with %d bytes to sender %s",
19181928
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nmaps,
19191929
(int)answer->bytes_used, ORTE_NAME_PRINT(sender));
1920-
if (0 > (rc = orte_rml.send_buffer_nb(sender, answer,
1930+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1931+
sender, answer,
19211932
ORTE_RML_TAG_DFS_DATA,
19221933
orte_rml_send_callback, NULL))) {
19231934
ORTE_ERROR_LOG(rc);
@@ -2172,7 +2183,8 @@ static void remote_open(int fd, short args, void *cbdata)
21722183
return;
21732184
}
21742185
/* send it */
2175-
if (0 > (rc = orte_rml.send_buffer_nb(&req->trk->requestor, answer,
2186+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
2187+
&req->trk->requestor, answer,
21762188
ORTE_RML_TAG_DFS_DATA,
21772189
orte_rml_send_callback, NULL))) {
21782190
ORTE_ERROR_LOG(rc);
@@ -2213,7 +2225,8 @@ static void remote_size(int fd, short args, void *cbdata)
22132225
return;
22142226
}
22152227
/* send it */
2216-
if (0 > (rc = orte_rml.send_buffer_nb(&req->trk->requestor, answer,
2228+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
2229+
&req->trk->requestor, answer,
22172230
ORTE_RML_TAG_DFS_DATA,
22182231
orte_rml_send_callback, NULL))) {
22192232
ORTE_ERROR_LOG(rc);
@@ -2277,7 +2290,8 @@ static void remote_seek(int fd, short args, void *cbdata)
22772290
return;
22782291
}
22792292
/* send it */
2280-
if (0 > (rc = orte_rml.send_buffer_nb(&req->trk->requestor, answer,
2293+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
2294+
&req->trk->requestor, answer,
22812295
ORTE_RML_TAG_DFS_DATA,
22822296
orte_rml_send_callback, NULL))) {
22832297
ORTE_ERROR_LOG(rc);
@@ -2344,7 +2358,8 @@ static void remote_read(int fd, short args, void *cbdata)
23442358
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
23452359
(long)bytes_read,
23462360
ORTE_NAME_PRINT(&req->trk->requestor));
2347-
if (0 > (rc = orte_rml.send_buffer_nb(&req->trk->requestor, answer,
2361+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
2362+
&req->trk->requestor, answer,
23482363
ORTE_RML_TAG_DFS_DATA,
23492364
orte_rml_send_callback, NULL))) {
23502365
ORTE_ERROR_LOG(rc);

orte/mca/dfs/test/dfs_test.c

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
33
* All rights reserved.
4-
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved
4+
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
55
* Copyright (c) 2014-2015 Research Organization for Information Science
66
* and Technology (RIST). All rights reserved.
77
* $COPYRIGHT$
@@ -529,7 +529,8 @@ static void process_opens(int fd, short args, void *cbdata)
529529
ORTE_NAME_PRINT(&daemon),
530530
filename);
531531
/* send it */
532-
if (0 > (rc = orte_rml.send_buffer_nb(&daemon, buffer,
532+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
533+
&daemon, buffer,
533534
ORTE_RML_TAG_DFS_CMD,
534535
orte_rml_send_callback, NULL))) {
535536
ORTE_ERROR_LOG(rc);
@@ -637,7 +638,8 @@ static void process_close(int fd, short args, void *cbdata)
637638
ORTE_NAME_PRINT(&trk->host_daemon),
638639
trk->local_fd);
639640
/* send it */
640-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
641+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
642+
&trk->host_daemon, buffer,
641643
ORTE_RML_TAG_DFS_CMD,
642644
orte_rml_send_callback, NULL))) {
643645
ORTE_ERROR_LOG(rc);
@@ -737,7 +739,8 @@ static void process_sizes(int fd, short args, void *cbdata)
737739
ORTE_NAME_PRINT(&trk->host_daemon),
738740
trk->local_fd);
739741
/* send it */
740-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
742+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
743+
&trk->host_daemon, buffer,
741744
ORTE_RML_TAG_DFS_CMD,
742745
orte_rml_send_callback, NULL))) {
743746
ORTE_ERROR_LOG(rc);
@@ -847,7 +850,8 @@ static void process_seeks(int fd, short args, void *cbdata)
847850
ORTE_NAME_PRINT(&trk->host_daemon),
848851
trk->local_fd);
849852
/* send it */
850-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
853+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
854+
&trk->host_daemon, buffer,
851855
ORTE_RML_TAG_DFS_CMD,
852856
orte_rml_send_callback, NULL))) {
853857
ORTE_ERROR_LOG(rc);
@@ -943,7 +947,8 @@ static void process_reads(int fd, short args, void *cbdata)
943947
ORTE_NAME_PRINT(&trk->host_daemon),
944948
trk->local_fd);
945949
/* send it */
946-
if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer,
950+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
951+
&trk->host_daemon, buffer,
947952
ORTE_RML_TAG_DFS_CMD,
948953
orte_rml_send_callback, NULL))) {
949954
ORTE_ERROR_LOG(rc);
@@ -1010,7 +1015,8 @@ static void process_posts(int fd, short args, void *cbdata)
10101015
goto error;
10111016
}
10121017
/* send it */
1013-
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_DAEMON, buffer,
1018+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1019+
ORTE_PROC_MY_DAEMON, buffer,
10141020
ORTE_RML_TAG_DFS_CMD,
10151021
orte_rml_send_callback, NULL))) {
10161022
ORTE_ERROR_LOG(rc);
@@ -1071,7 +1077,8 @@ static void process_getfm(int fd, short args, void *cbdata)
10711077
goto error;
10721078
}
10731079
/* send it */
1074-
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_DAEMON, buffer,
1080+
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
1081+
ORTE_PROC_MY_DAEMON, buffer,
10751082
ORTE_RML_TAG_DFS_CMD,
10761083
orte_rml_send_callback, NULL))) {
10771084
ORTE_ERROR_LOG(rc);

0 commit comments

Comments
 (0)