Skip to content

Commit b8c5d1a

Browse files
author
Ralph Castain
committed
Update the routed components as we no longer need to init_routes. Fixes case of direct launch via srun
Signed-off-by: Ralph Castain <[email protected]> Signed-off-by: Ralph Castain <[email protected]>
1 parent 30298cc commit b8c5d1a

File tree

12 files changed

+151
-770
lines changed

12 files changed

+151
-770
lines changed

orte/mca/ess/base/ess_base_std_app.c

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include "orte/mca/grpcomm/base/base.h"
5555
#include "orte/mca/oob/base/base.h"
5656
#include "orte/mca/rml/rml.h"
57+
#include "orte/mca/rml/base/rml_contact.h"
5758
#include "orte/mca/odls/odls_types.h"
5859
#include "orte/mca/filem/base/base.h"
5960
#include "orte/mca/errmgr/base/base.h"
@@ -148,6 +149,17 @@ int orte_ess_base_app_setup(bool db_restrict_local)
148149
"output-", NULL, NULL);
149150
}
150151
/* Setup the communication infrastructure */
152+
/* Routed system */
153+
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
154+
ORTE_ERROR_LOG(ret);
155+
error = "orte_routed_base_open";
156+
goto error;
157+
}
158+
if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
159+
ORTE_ERROR_LOG(ret);
160+
error = "orte_routed_base_select";
161+
goto error;
162+
}
151163
/*
152164
* OOB Layer
153165
*/
@@ -172,29 +184,40 @@ int orte_ess_base_app_setup(bool db_restrict_local)
172184
error = "orte_rml_base_select";
173185
goto error;
174186
}
187+
/* if we have info on the HNP and local daemon, process it */
188+
if (NULL != orte_process_info.my_hnp_uri) {
189+
/* we have to set the HNP's name, even though we won't route messages directly
190+
* to it. This is required to ensure that we -do- send messages to the correct
191+
* HNP name
192+
*/
193+
if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
194+
ORTE_PROC_MY_HNP, NULL))) {
195+
ORTE_ERROR_LOG(ret);
196+
error = "orte_rml_parse_HNP";
197+
goto error;
198+
}
199+
}
200+
if (NULL != orte_process_info.my_daemon_uri) {
201+
/* extract the daemon's name so we can update the routing table */
202+
if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
203+
ORTE_PROC_MY_DAEMON, NULL))) {
204+
ORTE_ERROR_LOG(ret);
205+
error = "orte_rml_parse_daemon";
206+
goto error;
207+
}
208+
/* Set the contact info in the RML - this won't actually establish
209+
* the connection, but just tells the RML how to reach the daemon
210+
* if/when we attempt to send to it
211+
*/
212+
orte_rml.set_contact_info(orte_process_info.my_daemon_uri);
213+
}
214+
175215
/* setup the errmgr */
176216
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
177217
ORTE_ERROR_LOG(ret);
178218
error = "orte_errmgr_base_select";
179219
goto error;
180220
}
181-
/* Routed system */
182-
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
183-
ORTE_ERROR_LOG(ret);
184-
error = "orte_routed_base_open";
185-
goto error;
186-
}
187-
if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
188-
ORTE_ERROR_LOG(ret);
189-
error = "orte_routed_base_select";
190-
goto error;
191-
}
192-
/* setup the routed info */
193-
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(NULL, ORTE_PROC_MY_NAME->jobid, NULL))) {
194-
ORTE_ERROR_LOG(ret);
195-
error = "orte_routed.init_routes";
196-
goto error;
197-
}
198221

199222
/* get a conduit for our use - we never route IO over fabric */
200223
OBJ_CONSTRUCT(&transports, opal_list_t);

orte/mca/ess/base/ess_base_std_orted.c

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
#include "orte/mca/rtc/base/base.h"
4949
#include "orte/mca/rml/base/base.h"
50+
#include "orte/mca/rml/base/rml_contact.h"
5051
#include "orte/mca/routed/base/base.h"
5152
#include "orte/mca/routed/routed.h"
5253
#include "orte/mca/oob/base/base.h"
@@ -386,6 +387,17 @@ int orte_ess_base_orted_setup(char **hosts)
386387
}
387388

388389
/* Setup the communication infrastructure */
390+
/* Routed system */
391+
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
392+
ORTE_ERROR_LOG(ret);
393+
error = "orte_routed_base_open";
394+
goto error;
395+
}
396+
if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
397+
ORTE_ERROR_LOG(ret);
398+
error = "orte_routed_base_select";
399+
goto error;
400+
}
389401
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
390402
ORTE_ERROR_LOG(ret);
391403
error = "orte_oob_base_open";
@@ -407,6 +419,21 @@ int orte_ess_base_orted_setup(char **hosts)
407419
goto error;
408420
}
409421

422+
if (NULL != orte_process_info.my_hnp_uri) {
423+
/* extract the HNP's name so we can update the routing table */
424+
if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
425+
ORTE_PROC_MY_HNP, NULL))) {
426+
ORTE_ERROR_LOG(ret);
427+
error = "orte_rml_parse_HNP";
428+
goto error;
429+
}
430+
/* Set the contact info in the RML - this won't actually establish
431+
* the connection, but just tells the RML how to reach the HNP
432+
* if/when we attempt to send to it
433+
*/
434+
orte_rml.set_contact_info(orte_process_info.my_hnp_uri);
435+
}
436+
410437
/* setup the PMIx server */
411438
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
412439
ORTE_ERROR_LOG(ret);
@@ -420,23 +447,6 @@ int orte_ess_base_orted_setup(char **hosts)
420447
error = "orte_errmgr_base_select";
421448
goto error;
422449
}
423-
/* Routed system */
424-
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
425-
ORTE_ERROR_LOG(ret);
426-
error = "orte_rml_base_open";
427-
goto error;
428-
}
429-
if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
430-
ORTE_ERROR_LOG(ret);
431-
error = "orte_routed_base_select";
432-
goto error;
433-
}
434-
/* setup the routed info */
435-
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(NULL, ORTE_PROC_MY_NAME->jobid, NULL))) {
436-
ORTE_ERROR_LOG(ret);
437-
error = "orte_routed.init_routes";
438-
goto error;
439-
}
440450

441451
/* get a conduit for our use - we never route IO over fabric */
442452
OBJ_CONSTRUCT(&transports, opal_list_t);
@@ -533,12 +543,6 @@ int orte_ess_base_orted_setup(char **hosts)
533543
}
534544
}
535545

536-
/* setup the routed info */
537-
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(NULL, ORTE_PROC_MY_NAME->jobid, NULL))) {
538-
ORTE_ERROR_LOG(ret);
539-
error = "orte_routed.init_routes";
540-
goto error;
541-
}
542546
/* setup I/O forwarding system - must come after we init routes */
543547
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
544548
ORTE_ERROR_LOG(ret);

orte/mca/ess/base/ess_base_std_tool.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,6 @@ int orte_ess_base_tool_setup(void)
157157
goto error;
158158
}
159159

160-
/* setup the routed info for all components */
161-
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(NULL, ORTE_PROC_MY_NAME->jobid, NULL))) {
162-
ORTE_ERROR_LOG(ret);
163-
error = "orte_routed.init_routes";
164-
goto error;
165-
}
166-
167160
/* setup I/O forwarding system - must come after we init routes */
168161
if (NULL != orte_process_info.my_hnp_uri) {
169162
/* only do this if we were given an HNP */

orte/mca/ess/hnp/ess_hnp_module.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -633,13 +633,6 @@ static int rte_init(void)
633633
/* set the event base */
634634
opal_pmix_base_set_evbase(orte_event_base);
635635

636-
/* setup the routed info */
637-
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(NULL, ORTE_PROC_MY_NAME->jobid, NULL))) {
638-
ORTE_ERROR_LOG(ret);
639-
error = "orte_routed.init_routes";
640-
goto error;
641-
}
642-
643636
/* setup the PMIx server */
644637
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
645638
/* the server code already barked, so let's be quiet */

orte/mca/routed/base/base.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,6 @@ ORTE_DECLSPEC int orte_routed_base_update_route(char *module, orte_process_name_
5353
orte_process_name_t *route);
5454
ORTE_DECLSPEC orte_process_name_t orte_routed_base_get_route(char *module,
5555
orte_process_name_t *target);
56-
ORTE_DECLSPEC int orte_routed_base_init_routes(char *module,
57-
orte_jobid_t job, opal_buffer_t *ndat);
5856
ORTE_DECLSPEC int orte_routed_base_route_lost(char *module,
5957
const orte_process_name_t *route);
6058
ORTE_DECLSPEC bool orte_routed_base_route_is_defined(char *module,

orte/mca/routed/base/routed_base_fns.c

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -125,25 +125,6 @@ orte_process_name_t orte_routed_base_get_route(char *module, orte_process_name_t
125125
return *ORTE_NAME_INVALID;
126126
}
127127

128-
int orte_routed_base_init_routes(char *module,
129-
orte_jobid_t job, opal_buffer_t *ndat)
130-
{
131-
orte_routed_base_active_t *active;
132-
int rc;
133-
134-
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
135-
if (NULL == module ||
136-
0 == strcmp(module, active->component->base_version.mca_component_name)) {
137-
if (NULL != active->module->init_routes) {
138-
if (ORTE_SUCCESS != (rc = active->module->init_routes(job, ndat))) {
139-
return rc;
140-
}
141-
}
142-
}
143-
}
144-
return ORTE_SUCCESS;
145-
}
146-
147128
int orte_routed_base_route_lost(char *module, const orte_process_name_t *route)
148129
{
149130
orte_routed_base_active_t *active;

orte/mca/routed/base/routed_base_frame.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ orte_routed_API_t orte_routed = {
4545
.delete_route = orte_routed_base_delete_route,
4646
.update_route = orte_routed_base_update_route,
4747
.get_route = orte_routed_base_get_route,
48-
.init_routes = orte_routed_base_init_routes,
4948
.route_lost = orte_routed_base_route_lost,
5049
.route_is_defined = orte_routed_base_route_is_defined,
5150
.set_lifeline = orte_routed_base_set_lifeline,

0 commit comments

Comments
 (0)