Skip to content

Commit 8f52696

Browse files
author
Ralph Castain
committed
Do not hang if we cannot relay messages. Eliminate extra error log message
Signed-off-by: Ralph Castain <[email protected]>
1 parent dea9ef2 commit 8f52696

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

opal/mca/pmix/base/pmix_base_fns.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@ int opal_pmix_base_exchange(opal_value_t *indat,
152152
rc = opal_pmix.publish(&ilist);
153153
OPAL_LIST_DESTRUCT(&ilist);
154154
if (OPAL_SUCCESS != rc) {
155-
OPAL_ERROR_LOG(rc);
156155
return rc;
157156
}
158157
} else {
@@ -161,7 +160,6 @@ int opal_pmix_base_exchange(opal_value_t *indat,
161160
caddy.pdat = NULL;
162161
rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy);
163162
if (OPAL_SUCCESS != rc) {
164-
OPAL_ERROR_LOG(rc);
165163
OPAL_LIST_DESTRUCT(&ilist);
166164
return rc;
167165
}
@@ -213,7 +211,6 @@ int opal_pmix_base_exchange(opal_value_t *indat,
213211
OPAL_LIST_DESTRUCT(&mlist);
214212
OPAL_LIST_DESTRUCT(&ilist);
215213
if (OPAL_SUCCESS != rc) {
216-
OPAL_ERROR_LOG(rc);
217214
return rc;
218215
}
219216
} else {
@@ -224,7 +221,6 @@ int opal_pmix_base_exchange(opal_value_t *indat,
224221
opal_argv_append_nosize(&keys, pdat->value.key);
225222
rc = opal_pmix.lookup_nb(keys, &mlist, lookup_cbfunc, &caddy);
226223
if (OPAL_SUCCESS != rc) {
227-
OPAL_ERROR_LOG(rc);
228224
OPAL_LIST_DESTRUCT(&mlist);
229225
opal_argv_free(keys);
230226
return rc;

orte/mca/grpcomm/direct/grpcomm_direct.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,15 +526,18 @@ static void xcast_recv(int status, orte_process_name_t* sender,
526526
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name));
527527
OBJ_RELEASE(rly);
528528
OBJ_RELEASE(item);
529+
ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH);
529530
continue;
530531
}
531532
if ((ORTE_PROC_STATE_RUNNING < rec->state &&
532533
ORTE_PROC_STATE_CALLED_ABORT != rec->state) ||
533534
!ORTE_FLAG_TEST(rec, ORTE_PROC_FLAG_ALIVE)) {
534-
opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay",
535-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name));
535+
opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay: %s ",
536+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name),
537+
ORTE_FLAG_TEST(rec, ORTE_PROC_FLAG_ALIVE) ? orte_proc_state_to_str(rec->state) : "NOT ALIVE");
536538
OBJ_RELEASE(rly);
537539
OBJ_RELEASE(item);
540+
ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH);
538541
continue;
539542
}
540543
if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(orte_coll_conduit,
@@ -543,6 +546,7 @@ static void xcast_recv(int status, orte_process_name_t* sender,
543546
ORTE_ERROR_LOG(ret);
544547
OBJ_RELEASE(rly);
545548
OBJ_RELEASE(item);
549+
ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH);
546550
continue;
547551
}
548552
OBJ_RELEASE(item);

0 commit comments

Comments
 (0)