Skip to content

Commit c8608a3

Browse files
authored
Merge pull request #8504 from bosilca/fix/no_new_connections_during_handshake
Prevent the establishment of new BTL connections during matching
2 parents f1e962b + 916c29a commit c8608a3

File tree

3 files changed

+24
-14
lines changed

3 files changed

+24
-14
lines changed

ompi/mca/pml/ob1/pml_ob1_recvfrag.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ int mca_pml_ob1_revoke_comm( struct ompi_communicator_t* ompi_comm, bool coll_on
423423
"ob1_revoke_comm: sending NACK to %d", hdr->hdr_rndv.hdr_match.hdr_src));
424424
/* Send a ACK with a NULL request to signify revocation */
425425
proc = mca_pml_ob1_peer_lookup(ompi_comm, hdr->hdr_rndv.hdr_match.hdr_src);
426-
mca_pml_ob1_recv_request_ack_send(proc->ompi_proc, hdr->hdr_rndv.hdr_src_req.lval, NULL, 0, 0, false);
426+
mca_pml_ob1_recv_request_ack_send(NULL, proc->ompi_proc, hdr->hdr_rndv.hdr_src_req.lval, NULL, 0, 0, false);
427427
}
428428
else {
429429
/* if it's a TYPE_MATCH, the sender is not expecting anything
@@ -605,7 +605,7 @@ void mca_pml_ob1_recv_frag_callback_match (mca_btl_base_module_t *btl,
605605
);
606606
}
607607

608-
/* no need to check if complete we know we are.. */
608+
/* no need to check if complete we know we are. */
609609
/* don't need a rmb as that is for checking */
610610
recv_request_pml_complete(match);
611611
}
@@ -1074,7 +1074,7 @@ static int mca_pml_ob1_recv_frag_match (mca_btl_base_module_t *btl,
10741074
MCA_PML_OB1_HDR_TYPE_RNDV == hdr->hdr_common.hdr_type );
10751075
/* Send a ACK with a NULL request to signify revocation */
10761076
mca_pml_ob1_rendezvous_hdr_t* hdr_rndv = (mca_pml_ob1_rendezvous_hdr_t*) hdr;
1077-
mca_pml_ob1_recv_request_ack_send(proc->ompi_proc, hdr_rndv->hdr_src_req.lval, NULL, 0, 0, false);
1077+
mca_pml_ob1_recv_request_ack_send(NULL, proc->ompi_proc, hdr_rndv->hdr_src_req.lval, NULL, 0, 0, false);
10781078
OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, "Recvfrag: comm %d is revoked or collectives force errors, sending a NACK to the RDV/RGET match from %d\n", hdr->hdr_ctx, hdr->hdr_src));
10791079
}
10801080
else {

ompi/mca/pml/ob1/pml_ob1_recvreq.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ int mca_pml_ob1_recv_request_ack_send_btl(
285285

286286
static int mca_pml_ob1_recv_request_ack(
287287
mca_pml_ob1_recv_request_t* recvreq,
288+
mca_btl_base_module_t* btl,
288289
mca_pml_ob1_rendezvous_hdr_t* hdr,
289290
size_t bytes_received)
290291
{
@@ -345,12 +346,12 @@ static int mca_pml_ob1_recv_request_ack(
345346

346347
/* let know to shedule function there is no need to put ACK flag. If not all message went over
347348
* RDMA then we cancel the GET protocol in order to switch back to send/recv. In this case send
348-
* back the remote send request, the peer kept a poointer to the frag locally. In the future we
349+
* back the remote send request, the peer kept a pointer to the frag locally. In the future we
349350
* might want to cancel the fragment itself, in which case we will have to send back the remote
350351
* fragment instead of the remote request.
351352
*/
352353
recvreq->req_ack_sent = true;
353-
return mca_pml_ob1_recv_request_ack_send(proc, hdr->hdr_src_req.lval,
354+
return mca_pml_ob1_recv_request_ack_send(btl, proc, hdr->hdr_src_req.lval,
354355
recvreq, recvreq->req_send_offset, 0,
355356
recvreq->req_send_offset == bytes_received);
356357
}
@@ -386,7 +387,7 @@ static int mca_pml_ob1_recv_request_get_frag_failed (mca_pml_ob1_rdma_frag_t *fr
386387
}
387388

388389
/* tell peer to fall back on send for this region */
389-
rc = mca_pml_ob1_recv_request_ack_send(proc, frag->rdma_hdr.hdr_rget.hdr_rndv.hdr_src_req.lval,
390+
rc = mca_pml_ob1_recv_request_ack_send(NULL, proc, frag->rdma_hdr.hdr_rget.hdr_rndv.hdr_src_req.lval,
390391
recvreq, frag->rdma_offset, frag->rdma_length, false);
391392
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
392393
return rc;
@@ -710,7 +711,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
710711
if (mca_pml_ob1_cuda_need_buffers(recvreq, btl))
711712
#endif /* OPAL_CUDA_SUPPORT */
712713
{
713-
mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv, 0);
714+
mca_pml_ob1_recv_request_ack(recvreq, btl, &hdr->hdr_rndv, 0);
714715
return;
715716
}
716717
}
@@ -853,7 +854,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq
853854
recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req;
854855
recvreq->req_rdma_offset = bytes_received;
855856
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_match);
856-
mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv, bytes_received);
857+
mca_pml_ob1_recv_request_ack(recvreq, btl, &hdr->hdr_rndv, bytes_received);
857858
/**
858859
* The PUT protocol do not attach any data to the original request.
859860
* Therefore, we might want to avoid unpacking if there is nothing to

ompi/mca/pml/ob1/pml_ob1_recvreq.h

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -431,21 +431,30 @@ int mca_pml_ob1_recv_request_ack_send_btl(ompi_proc_t* proc,
431431
mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req,
432432
uint64_t hdr_rdma_offset, uint64_t size, bool nordma);
433433

434-
static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
435-
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset,
436-
uint64_t size, bool nordma)
434+
static inline int
435+
mca_pml_ob1_recv_request_ack_send(mca_btl_base_module_t* btl,
436+
ompi_proc_t* proc,
437+
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset,
438+
uint64_t size, bool nordma)
437439
{
438440
size_t i;
439441
mca_bml_base_btl_t* bml_btl;
440442
mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (proc);
441443

442444
assert (NULL != endpoint);
443445

446+
/**
447+
* If a btl has been requested then send the ack using that specific device, otherwise
448+
* we are free to pick one. We need to force the ack to go over a specific BTL, in order
449+
* to prevent the establishement of new connections during the matching handshake.
450+
*/
444451
for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
445452
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
446-
if(mca_pml_ob1_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,
447-
hdr_dst_req, hdr_send_offset, size, nordma) == OMPI_SUCCESS)
448-
return OMPI_SUCCESS;
453+
if( (NULL == btl) || (btl == bml_btl->btl) ) {
454+
if(mca_pml_ob1_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,
455+
hdr_dst_req, hdr_send_offset, size, nordma) == OMPI_SUCCESS)
456+
return OMPI_SUCCESS;
457+
}
449458
}
450459

451460
MCA_PML_OB1_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req,

0 commit comments

Comments
 (0)