Skip to content

Commit 73ffac5

Browse files
committed
pml/ob1: detect unreachable errors
This commit adds code to detect when procs are unreachable when using the dynamic add_procs functionality. Fixes open-mpi#1501 Signed-off-by: Nathan Hjelm <[email protected]> (cherry picked from open-mpi/ompi@9d5eeec) Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 4bdd64b commit 73ffac5

File tree

3 files changed

+12
-0
lines changed

3 files changed

+12
-0
lines changed

ompi/mca/bml/r2/bml_r2.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,8 @@ static int mca_bml_r2_add_proc (struct ompi_proc_t *proc)
410410
}
411411

412412
if (!btl_in_use) {
413+
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
414+
OBJ_RELEASE(bml_endpoint);
413415
/* no btl is available for this proc */
414416
if (mca_bml_r2.show_unreach_errors) {
415417
opal_show_help ("help-mca-bml-r2.txt", "unreachable proc", true,

ompi/mca/pml/ob1/pml_ob1_isend.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ int mca_pml_ob1_isend(const void *buf,
140140
int16_t seqn;
141141
int rc;
142142

143+
if (OPAL_UNLIKELY(NULL == endpoint)) {
144+
return OMPI_ERR_UNREACH;
145+
}
146+
143147
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
144148

145149
if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) {
@@ -189,6 +193,10 @@ int mca_pml_ob1_send(const void *buf,
189193
int16_t seqn;
190194
int rc;
191195

196+
if (OPAL_UNLIKELY(NULL == endpoint)) {
197+
return OMPI_ERR_UNREACH;
198+
}
199+
192200
if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == sendmode)) {
193201
/* large buffered sends *need* a real request so use isend instead */
194202
ompi_request_t *brequest;

ompi/mca/pml/ob1/pml_ob1_recvreq.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,8 @@ static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
435435
mca_bml_base_btl_t* bml_btl;
436436
mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (proc);
437437

438+
assert (NULL != endpoint);
439+
438440
for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
439441
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
440442
if(mca_pml_ob1_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,

0 commit comments

Comments
 (0)